github.com/weaviate/weaviate@v1.24.6/usecases/schema/migrate/fs/file_structure_migration.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package fs
    13  
    14  import (
    15  	"fmt"
    16  	"os"
    17  	"path"
    18  	"path/filepath"
    19  	"strings"
    20  
    21  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    22  	entschema "github.com/weaviate/weaviate/entities/schema"
    23  	"github.com/weaviate/weaviate/usecases/sharding"
    24  )
    25  
    26  const vectorIndexCommitLog = `hnsw.commitlog.d`
    27  
    28  func MigrateToHierarchicalFS(rootPath string, s schemaGetter) error {
    29  	root, err := os.ReadDir(rootPath)
    30  	if err != nil {
    31  		return fmt.Errorf("read source path %q: %w", rootPath, err)
    32  	}
    33  	fm := newFileMatcher(s, rootPath)
    34  	plan, err := assembleFSMigrationPlan(root, rootPath, fm)
    35  	if err != nil {
    36  		return err
    37  	}
    38  
    39  	for newRoot, parts := range plan.partsByShard {
    40  		for _, part := range parts {
    41  			newPath := path.Join(newRoot, part.newRelPath)
    42  			absDir, _ := filepath.Split(newPath)
    43  			if err := os.MkdirAll(absDir, os.ModePerm); err != nil {
    44  				return fmt.Errorf("mkdir %q: %w", absDir, err)
    45  			}
    46  			if err = os.Rename(part.oldAbsPath, newPath); err != nil {
    47  				return fmt.Errorf("mv %s %s: %w", part.oldAbsPath, newPath, err)
    48  			}
    49  		}
    50  	}
    51  
    52  	return nil
    53  }
    54  
    55  type migrationPart struct {
    56  	oldAbsPath string
    57  	newRelPath string
    58  }
    59  
    60  type shardRoot = string
    61  
    62  type migrationPlan struct {
    63  	rootPath     string
    64  	partsByShard map[shardRoot][]migrationPart
    65  }
    66  
    67  func newMigrationPlan(rootPath string) *migrationPlan {
    68  	return &migrationPlan{rootPath: rootPath, partsByShard: make(map[string][]migrationPart)}
    69  }
    70  
    71  func (p *migrationPlan) append(class, shard, oldRootRelPath, newShardRelPath string) {
    72  	shardRoot := path.Join(p.rootPath, strings.ToLower(class), shard)
    73  	p.partsByShard[shardRoot] = append(p.partsByShard[shardRoot], migrationPart{
    74  		oldAbsPath: path.Join(p.rootPath, oldRootRelPath),
    75  		newRelPath: newShardRelPath,
    76  	})
    77  }
    78  
    79  func (p *migrationPlan) prepend(class, shard, oldRootRelPath, newShardRelPath string) {
    80  	shardRoot := path.Join(p.rootPath, strings.ToLower(class), shard)
    81  	p.partsByShard[shardRoot] = append([]migrationPart{{
    82  		oldAbsPath: path.Join(p.rootPath, oldRootRelPath),
    83  		newRelPath: newShardRelPath,
    84  	}}, p.partsByShard[shardRoot]...)
    85  }
    86  
    87  func assembleFSMigrationPlan(entries []os.DirEntry, rootPath string, fm *fileMatcher) (*migrationPlan, error) {
    88  	plan := newMigrationPlan(rootPath)
    89  
    90  	for _, entry := range entries {
    91  		if ok, cs := fm.isShardLsmDir(entry); ok {
    92  			// make sure lsm dir is moved first, otherwise os.Rename may fail
    93  			// if directory already exists (created by other files/dirs moved before)
    94  			plan.prepend(cs.class, cs.shard,
    95  				entry.Name(),
    96  				"lsm")
    97  		} else if ok, cs, suffix := fm.isShardFile(entry); ok {
    98  			plan.append(cs.class, cs.shard,
    99  				entry.Name(),
   100  				suffix)
   101  		} else if ok, cs := fm.isShardCommitLogDir(entry); ok {
   102  			plan.append(cs.class, cs.shard,
   103  				entry.Name(),
   104  				fmt.Sprintf("main.%s", vectorIndexCommitLog))
   105  		} else if ok, csp := fm.isShardGeoCommitLogDir(entry); ok {
   106  			plan.append(csp.class, csp.shard,
   107  				entry.Name(),
   108  				fmt.Sprintf("geo.%s.%s", csp.geoProp, vectorIndexCommitLog))
   109  		} else if ok, css := fm.isPqDir(entry); ok {
   110  			for _, cs := range css {
   111  				plan.append(cs.class, cs.shard,
   112  					path.Join(strings.ToLower(entry.Name()), cs.shard, "compressed_objects"),
   113  					path.Join("lsm", helpers.VectorsCompressedBucketLSM))
   114  			}
   115  
   116  			// explicitly rename Class directory starting with uppercase to lowercase
   117  			// as MkdirAll will not create lowercased dir if uppercased one exists
   118  			oldClassRoot := path.Join(rootPath, entry.Name())
   119  			newClassRoot := path.Join(rootPath, strings.ToLower(entry.Name()))
   120  			if err := os.Rename(oldClassRoot, newClassRoot); err != nil {
   121  				return nil, fmt.Errorf(
   122  					"rename pq index dir to avoid collision, old: %q, new: %q, err: %w",
   123  					oldClassRoot, newClassRoot, err)
   124  			}
   125  		}
   126  	}
   127  	return plan, nil
   128  }
   129  
   130  type classShard struct {
   131  	class string
   132  	shard string
   133  }
   134  
   135  type classShardGeoProp struct {
   136  	class   string
   137  	shard   string
   138  	geoProp string
   139  }
   140  
   141  type fileMatcher struct {
   142  	rootPath            string
   143  	shardLsmDirs        map[string]*classShard
   144  	shardFilePrefixes   map[string]*classShard
   145  	shardGeoDirPrefixes map[string]*classShardGeoProp
   146  	classes             map[string][]*classShard
   147  }
   148  
   149  type schemaGetter interface {
   150  	CopyShardingState(class string) *sharding.State
   151  	GetSchemaSkipAuth() entschema.Schema
   152  }
   153  
   154  func newFileMatcher(schemaGetter schemaGetter, rootPath string) *fileMatcher {
   155  	shardLsmDirs := make(map[string]*classShard)
   156  	shardFilePrefixes := make(map[string]*classShard)
   157  	shardGeoDirPrefixes := make(map[string]*classShardGeoProp)
   158  	classes := make(map[string][]*classShard)
   159  
   160  	sch := schemaGetter.GetSchemaSkipAuth()
   161  	for _, class := range sch.Objects.Classes {
   162  		shards := schemaGetter.CopyShardingState(class.Class).AllLocalPhysicalShards()
   163  		lowercasedClass := strings.ToLower(class.Class)
   164  
   165  		var geoProps []string
   166  		for _, prop := range class.Properties {
   167  			if dt, ok := entschema.AsPrimitive(prop.DataType); ok && dt == entschema.DataTypeGeoCoordinates {
   168  				geoProps = append(geoProps, prop.Name)
   169  			}
   170  		}
   171  
   172  		classes[class.Class] = make([]*classShard, 0, len(shards))
   173  		for _, shard := range shards {
   174  			cs := &classShard{class: class.Class, shard: shard}
   175  			shardLsmDirs[fmt.Sprintf("%s_%s_lsm", lowercasedClass, shard)] = cs
   176  			shardFilePrefixes[fmt.Sprintf("%s_%s", lowercasedClass, shard)] = cs
   177  			classes[class.Class] = append(classes[class.Class], cs)
   178  
   179  			for _, geoProp := range geoProps {
   180  				csp := &classShardGeoProp{class: class.Class, shard: shard, geoProp: geoProp}
   181  				shardGeoDirPrefixes[fmt.Sprintf("%s_%s_%s", lowercasedClass, shard, geoProp)] = csp
   182  			}
   183  		}
   184  	}
   185  
   186  	return &fileMatcher{
   187  		rootPath:            rootPath,
   188  		shardLsmDirs:        shardLsmDirs,
   189  		shardFilePrefixes:   shardFilePrefixes,
   190  		shardGeoDirPrefixes: shardGeoDirPrefixes,
   191  		classes:             classes,
   192  	}
   193  }
   194  
   195  // Checks if entry is directory with name (class is lowercased):
   196  // class_shard_lsm
   197  func (fm *fileMatcher) isShardLsmDir(entry os.DirEntry) (bool, *classShard) {
   198  	if !entry.IsDir() {
   199  		return false, nil
   200  	}
   201  	if cs, ok := fm.shardLsmDirs[entry.Name()]; ok {
   202  		return true, cs
   203  	}
   204  	return false, nil
   205  }
   206  
   207  // Checks if entry is file with name (class is lowercased):
   208  // class_shard.*
   209  // (e.g. class_shard.version, class_shard.indexcount)
   210  func (fm *fileMatcher) isShardFile(entry os.DirEntry) (bool, *classShard, string) {
   211  	if !entry.Type().IsRegular() {
   212  		return false, nil, ""
   213  	}
   214  	parts := strings.SplitN(entry.Name(), ".", 2)
   215  	if len(parts) != 2 {
   216  		return false, nil, ""
   217  	}
   218  	if cs, ok := fm.shardFilePrefixes[parts[0]]; ok {
   219  		return true, cs, parts[1]
   220  	}
   221  	return false, nil, ""
   222  }
   223  
   224  // Checks if entry is directory with name (class is lowercased):
   225  // class_shard.hnsw.commitlog.d
   226  func (fm *fileMatcher) isShardCommitLogDir(entry os.DirEntry) (bool, *classShard) {
   227  	if !entry.IsDir() {
   228  		return false, nil
   229  	}
   230  	parts := strings.SplitN(entry.Name(), ".", 2)
   231  	if len(parts) != 2 {
   232  		return false, nil
   233  	}
   234  	if parts[1] != vectorIndexCommitLog {
   235  		return false, nil
   236  	}
   237  	if cs, ok := fm.shardFilePrefixes[parts[0]]; ok {
   238  		return true, cs
   239  	}
   240  	return false, nil
   241  }
   242  
   243  // Checks if entry is directory with name (class is lowercased):
   244  // class_shard_prop.hnsw.commitlog.d
   245  func (fm *fileMatcher) isShardGeoCommitLogDir(entry os.DirEntry) (bool, *classShardGeoProp) {
   246  	if !entry.IsDir() {
   247  		return false, nil
   248  	}
   249  	parts := strings.SplitN(entry.Name(), ".", 2)
   250  	if len(parts) != 2 {
   251  		return false, nil
   252  	}
   253  	if parts[1] != vectorIndexCommitLog {
   254  		return false, nil
   255  	}
   256  	if csp, ok := fm.shardGeoDirPrefixes[parts[0]]; ok {
   257  		return true, csp
   258  	}
   259  	return false, nil
   260  }
   261  
   262  // Checks if entry is directory containing PQ index:
   263  // Class/shard/compressed_object
   264  func (fm *fileMatcher) isPqDir(entry os.DirEntry) (bool, []*classShard) {
   265  	if !entry.IsDir() {
   266  		return false, nil
   267  	}
   268  
   269  	resultcss := []*classShard{}
   270  	if css, ok := fm.classes[entry.Name()]; ok {
   271  		for _, cs := range css {
   272  			pqDir := path.Join(fm.rootPath, cs.class, cs.shard, "compressed_objects")
   273  			if info, err := os.Stat(pqDir); err == nil && info.IsDir() {
   274  				resultcss = append(resultcss, cs)
   275  			}
   276  		}
   277  		return true, resultcss
   278  	}
   279  	return false, nil
   280  }