github.com/weaviate/weaviate@v1.24.6/adapters/repos/schema/store.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package schema
    13  
    14  import (
    15  	"bytes"
    16  	"context"
    17  	"encoding/json"
    18  	"fmt"
    19  	"os"
    20  	"path"
    21  
    22  	enterrors "github.com/weaviate/weaviate/entities/errors"
    23  
    24  	"github.com/pkg/errors"
    25  	"github.com/sirupsen/logrus"
    26  	"github.com/weaviate/weaviate/entities/models"
    27  	ucs "github.com/weaviate/weaviate/usecases/schema"
    28  	"github.com/weaviate/weaviate/usecases/sharding"
    29  	bolt "go.etcd.io/bbolt"
    30  )
    31  
    32  var (
    33  	// old keys are still needed for migration
    34  	schemaBucket = []byte("schema")
    35  	schemaKey    = []byte("schema")
    36  	// static keys
    37  	keyMetaClass         = []byte{eTypeMeta, 0}
    38  	keyShardingState     = []byte{eTypeSharingState, 0}
    39  	keyConfig            = []byte{eTypeConfig, 0}
    40  	_Version         int = 2
    41  )
    42  
    43  // constant to encode the type of entry in the DB
    44  const (
    45  	eTypeConfig       byte = 1
    46  	eTypeClass        byte = 2
    47  	eTypeShard        byte = 4
    48  	eTypeMeta         byte = 5
    49  	eTypeSharingState byte = 15
    50  )
    51  
    52  // config configuration specific the stored schema
    53  type config struct {
    54  	Version int
    55  	// add more fields
    56  }
    57  
    58  /*
    59  Store is responsible for storing and persisting the schema in a structured manner.
    60  It ensures that each class has a dedicated bucket, which includes metadata, and sharding state.
    61  
    62  Schema Structure:
    63    - Config: contains metadata related to parsing the schema
    64    - Nested buckets for each class
    65  
    66  Schema Structure for a class Bucket:
    67    - Metadata contains models.Class
    68    - Sharding state without shards
    69    - Class shards: individual shard associated with the sharding state
    70  
    71  By organizing the schema in this manner, it facilitates efficient management of class specific data during runtime.
    72  In addition, old schema are backed up and migrated to the new structure for a seamless transitions
    73  */
    74  type store struct {
    75  	version int    // schema version
    76  	homeDir string // home directory of schema files
    77  	log     logrus.FieldLogger
    78  	db      *bolt.DB
    79  }
    80  
    81  // NewStore returns a new schema repository. Call the Open() method to open the underlying DB.
    82  // To free the resources, call the Close() method.
    83  func NewStore(homeDir string, logger logrus.FieldLogger) *store {
    84  	return &store{
    85  		version: _Version,
    86  		homeDir: homeDir,
    87  		log:     logger,
    88  	}
    89  }
    90  
    91  func initBoltDB(filePath string, version int, cfg *config) (*bolt.DB, error) {
    92  	db, err := bolt.Open(filePath, 0o600, nil)
    93  	if err != nil {
    94  		return nil, fmt.Errorf("open %q: %w", filePath, err)
    95  	}
    96  	root := func(tx *bolt.Tx) error {
    97  		b, err := tx.CreateBucket(schemaBucket)
    98  		// A new bucket has been created
    99  		if err == nil {
   100  			*cfg = config{Version: version}
   101  			return saveConfig(b, *cfg)
   102  		}
   103  		// load existing bucket
   104  		b = tx.Bucket(schemaBucket)
   105  		if b == nil {
   106  			return fmt.Errorf("retrieve existing bucket %q", schemaBucket)
   107  		}
   108  		// read config:  config exists since version 2
   109  		data := b.Get(keyConfig)
   110  		if len(data) > 0 {
   111  			if err := json.Unmarshal(data, &cfg); err != nil {
   112  				return fmt.Errorf("cannot read config: %w", err)
   113  			}
   114  		}
   115  		return nil
   116  	}
   117  
   118  	return db, db.Update(root)
   119  }
   120  
   121  // Open the underlying DB
   122  func (r *store) Open() (err error) {
   123  	if err := os.MkdirAll(r.homeDir, 0o777); err != nil {
   124  		return fmt.Errorf("create root directory %q: %w", r.homeDir, err)
   125  	}
   126  	cfg := config{}
   127  	path := path.Join(r.homeDir, "schema.db")
   128  	boltDB, err := initBoltDB(path, r.version, &cfg)
   129  	if err != nil {
   130  		return fmt.Errorf("init bolt_db: %w", err)
   131  	}
   132  	defer func() {
   133  		if err != nil {
   134  			boltDB.Close()
   135  		}
   136  	}()
   137  	r.db = boltDB
   138  	if cfg.Version < r.version {
   139  		if err := r.migrate(path, cfg.Version, r.version); err != nil {
   140  			return fmt.Errorf("migrate: %w", err)
   141  		}
   142  	}
   143  	if cfg.Version > r.version {
   144  		return fmt.Errorf("schema version %d higher than %d", cfg.Version, r.version)
   145  	}
   146  	return err
   147  }
   148  
   149  // Close the underlying DB
   150  func (r *store) Close() {
   151  	r.db.Close()
   152  }
   153  
   154  // migrate from old to new schema
   155  // It will back up the old schema file if it exists
   156  func (r *store) migrate(filePath string, from, to int) (err error) {
   157  	r.log.Infof("schema migration from v%d to v%d process has started", from, to)
   158  	defer func() {
   159  		if err == nil {
   160  			r.log.Infof("successfully completed schema migration from v%d to v%d", from, to)
   161  		}
   162  	}()
   163  	state, err := r.loadSchemaV1()
   164  	if err != nil {
   165  		return fmt.Errorf("load old schema: %w", err)
   166  	}
   167  	if state != nil {
   168  		// create backupPath by copying file
   169  		backupPath := fmt.Sprintf("%s_v%d.bak", filePath, from)
   170  		if err := copyFile(backupPath, filePath); err != nil {
   171  			return fmt.Errorf("schema backup: %w", err)
   172  		}
   173  
   174  		// write new schema
   175  		f := func(tx *bolt.Tx) error {
   176  			b := tx.Bucket(schemaBucket)
   177  			if err := saveConfig(b, config{Version: to}); err != nil {
   178  				return err
   179  			}
   180  			b.Delete(schemaKey) // remove old schema
   181  			return r.saveAllTx(context.Background(), b, *state)(tx)
   182  		}
   183  		if err := r.db.Update(f); err != nil {
   184  			os.Remove(backupPath)
   185  			return fmt.Errorf("convert to new schema: %w", err)
   186  		}
   187  	}
   188  	return nil
   189  }
   190  
   191  // saveSchemaV1 might be needed to migrate from v2 to v0
   192  func (r *store) saveSchemaV1(schema ucs.State) error {
   193  	schemaJSON, err := json.Marshal(schema)
   194  	if err != nil {
   195  		return errors.Wrapf(err, "marshal schema state to json")
   196  	}
   197  
   198  	return r.db.Update(func(tx *bolt.Tx) error {
   199  		b := tx.Bucket(schemaBucket)
   200  		return b.Put(schemaKey, schemaJSON)
   201  	})
   202  }
   203  
   204  // loadSchemaV1 is needed to migrate from v0 to v2
   205  func (r *store) loadSchemaV1() (*ucs.State, error) {
   206  	var schemaJSON []byte
   207  	r.db.View(func(tx *bolt.Tx) error {
   208  		b := tx.Bucket(schemaBucket)
   209  		schemaJSON = b.Get(schemaKey)
   210  		return nil
   211  	})
   212  
   213  	if len(schemaJSON) == 0 {
   214  		return nil, nil
   215  	}
   216  
   217  	var state ucs.State
   218  	err := json.Unmarshal(schemaJSON, &state)
   219  	if err != nil {
   220  		return nil, errors.Wrapf(err, "parse schema state from JSON")
   221  	}
   222  
   223  	return &state, nil
   224  }
   225  
   226  // UpdateClass if it exists, otherwise return an error.
   227  func (r *store) UpdateClass(_ context.Context, data ucs.ClassPayload) error {
   228  	classKey := encodeClassName(data.Name)
   229  	f := func(tx *bolt.Tx) error {
   230  		b := tx.Bucket(schemaBucket).Bucket(classKey)
   231  		if b == nil {
   232  			return fmt.Errorf("class not found")
   233  		}
   234  		return r.updateClass(b, data)
   235  	}
   236  	return r.db.Update(f)
   237  }
   238  
   239  // NewClass creates a new class if it doesn't exists, otherwise return an error
   240  func (r *store) NewClass(_ context.Context, data ucs.ClassPayload) error {
   241  	classKey := encodeClassName(data.Name)
   242  	f := func(tx *bolt.Tx) error {
   243  		b, err := tx.Bucket(schemaBucket).CreateBucket(classKey)
   244  		if err != nil {
   245  			return err
   246  		}
   247  		return r.updateClass(b, data)
   248  	}
   249  	return r.db.Update(f)
   250  }
   251  
   252  func (r *store) updateClass(b *bolt.Bucket, data ucs.ClassPayload) error {
   253  	// remove old shards
   254  	if data.ReplaceShards {
   255  		cursor := b.Cursor() // b.Put before
   256  		for key, _ := cursor.First(); key != nil; {
   257  			if key[0] == eTypeShard {
   258  				b.Delete(key)
   259  			}
   260  			key, _ = cursor.Next()
   261  		}
   262  	}
   263  	if data.Metadata != nil {
   264  		if err := b.Put(keyMetaClass, data.Metadata); err != nil {
   265  			return err
   266  		}
   267  	}
   268  
   269  	if data.ShardingState != nil {
   270  		if err := b.Put(keyShardingState, data.ShardingState); err != nil {
   271  			return err
   272  		}
   273  	}
   274  
   275  	return appendShards(b, data.Shards, make([]byte, 1, 68))
   276  }
   277  
   278  // DeleteClass class
   279  func (r *store) DeleteClass(_ context.Context, class string) error {
   280  	classKey := encodeClassName(class)
   281  	f := func(tx *bolt.Tx) error {
   282  		err := tx.Bucket(schemaBucket).DeleteBucket(classKey)
   283  		if err != nil && !errors.Is(err, bolt.ErrBucketNotFound) {
   284  			return err
   285  		}
   286  		return nil
   287  	}
   288  	return r.db.Update(f)
   289  }
   290  
   291  // NewShards add new shards to an existing class
   292  func (r *store) NewShards(_ context.Context, class string, shards []ucs.KeyValuePair) error {
   293  	classKey := encodeClassName(class)
   294  	f := func(tx *bolt.Tx) error {
   295  		b := tx.Bucket(schemaBucket).Bucket(classKey)
   296  		if b == nil {
   297  			return fmt.Errorf("class not found")
   298  		}
   299  		return appendShards(b, shards, make([]byte, 1, 68))
   300  	}
   301  	return r.db.Update(f)
   302  }
   303  
   304  // Update shards updates (replaces) shards of existing class
   305  // Error is returned if class or shard does not exist
   306  func (r *store) UpdateShards(_ context.Context, class string, shards []ucs.KeyValuePair) error {
   307  	classKey := encodeClassName(class)
   308  	f := func(tx *bolt.Tx) error {
   309  		b := tx.Bucket(schemaBucket).Bucket(classKey)
   310  		if b == nil {
   311  			return fmt.Errorf("class not found")
   312  		}
   313  		keyBuf := make([]byte, 1, 68)
   314  		if !existShards(b, shards, keyBuf) {
   315  			return fmt.Errorf("shard not found")
   316  		}
   317  		return appendShards(b, shards, keyBuf)
   318  	}
   319  	return r.db.Update(f)
   320  }
   321  
   322  // DeleteShards of a specific class
   323  //
   324  //	If the class or a shard does not exist then nothing is done and a nil error is returned
   325  func (r *store) DeleteShards(_ context.Context, class string, shards []string) error {
   326  	classKey := encodeClassName(class)
   327  	f := func(tx *bolt.Tx) error {
   328  		b := tx.Bucket(schemaBucket).Bucket(classKey)
   329  		if b == nil {
   330  			return nil
   331  		}
   332  		return deleteShards(b, shards, make([]byte, 1, 68))
   333  	}
   334  	return r.db.Update(f)
   335  }
   336  
   337  // Load loads the complete schema from the persistent storage
   338  func (r *store) Load(ctx context.Context) (ucs.State, error) {
   339  	state := ucs.NewState(32)
   340  	for data := range r.load(ctx) {
   341  		if data.Error != nil {
   342  			return state, data.Error
   343  		}
   344  		cls := models.Class{Class: string(data.Name)}
   345  		ss := sharding.State{}
   346  
   347  		if err := json.Unmarshal(data.Metadata, &cls); err != nil {
   348  			return state, fmt.Errorf("unmarshal class %q", cls.Class)
   349  		}
   350  		if err := json.Unmarshal(data.ShardingState, &ss); err != nil {
   351  			return state, fmt.Errorf("unmarshal sharding state for class %q size %d",
   352  				cls.Class, len(data.ShardingState))
   353  		}
   354  		if n := len(data.Shards); n > 0 {
   355  			ss.Physical = make(map[string]sharding.Physical, n)
   356  		}
   357  		for _, shard := range data.Shards {
   358  			phy := sharding.Physical{}
   359  			name := string(shard.Key)
   360  			if err := json.Unmarshal(shard.Value, &phy); err != nil {
   361  				return state, fmt.Errorf("unmarshal shard %q for class %q", name, cls.Class)
   362  			}
   363  			ss.Physical[name] = phy
   364  		}
   365  		state.ObjectSchema.Classes = append(state.ObjectSchema.Classes, &cls)
   366  		state.ShardingState[cls.Class] = &ss
   367  	}
   368  	return state, nil
   369  }
   370  
   371  func (r *store) load(ctx context.Context) <-chan ucs.ClassPayload {
   372  	ch := make(chan ucs.ClassPayload, 1)
   373  	f := func(tx *bolt.Tx) (err error) {
   374  		root := tx.Bucket(schemaBucket)
   375  		rootCursor := root.Cursor()
   376  		for cls, _ := rootCursor.First(); cls != nil; {
   377  			if cls[0] != eTypeClass {
   378  				cls, _ = rootCursor.Next()
   379  				continue
   380  			}
   381  			if err := ctx.Err(); err != nil {
   382  				ch <- ucs.ClassPayload{Error: err}
   383  				return err
   384  			}
   385  			b := root.Bucket(cls)
   386  			if b == nil {
   387  				err := fmt.Errorf("class not found")
   388  				ch <- ucs.ClassPayload{Error: err}
   389  				return err
   390  			}
   391  			x := ucs.ClassPayload{
   392  				Name:   string(cls[1:]),
   393  				Shards: make([]ucs.KeyValuePair, 0, 32),
   394  			}
   395  			cursor := b.Cursor()
   396  			for key, value := cursor.First(); key != nil; {
   397  				if bytes.Equal(key, keyMetaClass) {
   398  					x.Metadata = value
   399  				} else if bytes.Equal(key, keyShardingState) {
   400  					x.ShardingState = value
   401  				} else {
   402  					x.Shards = append(x.Shards, ucs.KeyValuePair{Key: string(key[1:]), Value: value})
   403  				}
   404  				key, value = cursor.Next()
   405  			}
   406  			ch <- x
   407  			cls, _ = rootCursor.Next()
   408  		}
   409  		return nil
   410  	}
   411  	enterrors.GoWrapper(func() {
   412  		defer close(ch)
   413  		r.db.View(f)
   414  	}, r.log)
   415  	return ch
   416  }
   417  
   418  // Save saves the complete schema to the persistent storage
   419  func (r *store) Save(ctx context.Context, ss ucs.State) error {
   420  	if (ss.ObjectSchema == nil || len(ss.ObjectSchema.Classes) == 0) &&
   421  		len(ss.ShardingState) == 0 {
   422  		return nil // empty schema nothing to store
   423  	}
   424  
   425  	if ss.ObjectSchema == nil ||
   426  		len(ss.ObjectSchema.Classes) == 0 ||
   427  		len(ss.ShardingState) == 0 {
   428  		return fmt.Errorf("inconsistent schema: missing required fields")
   429  	}
   430  
   431  	f := func(tx *bolt.Tx) error {
   432  		root := tx.Bucket(schemaBucket)
   433  		return r.saveAllTx(ctx, root, ss)(tx)
   434  	}
   435  	return r.db.Update(f)
   436  }
   437  
   438  func (r *store) saveAllTx(ctx context.Context, root *bolt.Bucket, ss ucs.State) func(tx *bolt.Tx) error {
   439  	return func(tx *bolt.Tx) error {
   440  		rootCursor := root.Cursor()
   441  		for cls, _ := rootCursor.First(); cls != nil; {
   442  			if cls[0] == eTypeClass {
   443  				err := root.DeleteBucket(cls)
   444  				if err != nil && !errors.Is(err, bolt.ErrBucketNotFound) {
   445  					return err
   446  				}
   447  			}
   448  			cls, _ = rootCursor.Next()
   449  		}
   450  		for _, cls := range ss.ObjectSchema.Classes {
   451  			if err := ctx.Err(); err != nil {
   452  				return fmt.Errorf("context for class %q: %w", cls.Class, err)
   453  			}
   454  			sharding := ss.ShardingState[cls.Class]
   455  			payload, err := ucs.CreateClassPayload(cls, sharding)
   456  			if err != nil {
   457  				return fmt.Errorf("create payload for class %q: %w", cls.Class, err)
   458  			}
   459  			b, err := root.CreateBucket(encodeClassName(cls.Class))
   460  			if err != nil {
   461  				return fmt.Errorf("create bucket for class %q: %w", cls.Class, err)
   462  			}
   463  			if err := r.updateClass(b, payload); err != nil {
   464  				return fmt.Errorf("update bucket %q: %w", cls.Class, err)
   465  			}
   466  		}
   467  
   468  		return nil
   469  	}
   470  }
   471  
   472  func saveConfig(root *bolt.Bucket, cfg config) error {
   473  	data, err := json.Marshal(&cfg)
   474  	if err != nil {
   475  		return fmt.Errorf("marshal config: %w", err)
   476  	}
   477  	if err := root.Put(keyConfig, data); err != nil {
   478  		return fmt.Errorf("write config: %w", err)
   479  	}
   480  	return nil
   481  }
   482  
   483  func existShards(b *bolt.Bucket, shards []ucs.KeyValuePair, keyBuf []byte) bool {
   484  	keyBuf[0] = eTypeShard
   485  	for _, pair := range shards {
   486  		kLen := len(pair.Key) + 1
   487  		keyBuf = append(keyBuf, pair.Key...)
   488  		if val := b.Get(keyBuf[:kLen]); val == nil {
   489  			return false
   490  		}
   491  		keyBuf = keyBuf[:1]
   492  	}
   493  	return true
   494  }
   495  
   496  func appendShards(b *bolt.Bucket, shards []ucs.KeyValuePair, key []byte) error {
   497  	key[0] = eTypeShard
   498  	for _, pair := range shards {
   499  		kLen := len(pair.Key) + 1
   500  		key = append(key, pair.Key...)
   501  		if err := b.Put(key[:kLen], pair.Value); err != nil {
   502  			return err
   503  		}
   504  		key = key[:1]
   505  	}
   506  	return nil
   507  }
   508  
   509  func deleteShards(b *bolt.Bucket, shards []string, keyBuf []byte) error {
   510  	keyBuf[0] = eTypeShard
   511  	for _, name := range shards {
   512  		kLen := len(name) + 1
   513  		keyBuf = append(keyBuf, name...)
   514  		if err := b.Delete(keyBuf[:kLen]); err != nil {
   515  			return err
   516  		}
   517  		keyBuf = keyBuf[:1]
   518  	}
   519  	return nil
   520  }
   521  
   522  func encodeClassName(name string) []byte {
   523  	len := len(name) + 1
   524  	buf := make([]byte, 1, len)
   525  	buf[0] = eTypeClass
   526  	buf = append(buf, name...)
   527  	return buf[:len]
   528  }
   529  
   530  func copyFile(dst, src string) error {
   531  	data, err := os.ReadFile(src)
   532  	if err != nil {
   533  		return err
   534  	}
   535  	return os.WriteFile(dst, data, 0o644)
   536  }
   537  
   538  // var _ = schemauc.Repo(&Repo{})