github.com/weaviate/weaviate@v1.24.6/adapters/repos/schema/store.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package schema 13 14 import ( 15 "bytes" 16 "context" 17 "encoding/json" 18 "fmt" 19 "os" 20 "path" 21 22 enterrors "github.com/weaviate/weaviate/entities/errors" 23 24 "github.com/pkg/errors" 25 "github.com/sirupsen/logrus" 26 "github.com/weaviate/weaviate/entities/models" 27 ucs "github.com/weaviate/weaviate/usecases/schema" 28 "github.com/weaviate/weaviate/usecases/sharding" 29 bolt "go.etcd.io/bbolt" 30 ) 31 32 var ( 33 // old keys are still needed for migration 34 schemaBucket = []byte("schema") 35 schemaKey = []byte("schema") 36 // static keys 37 keyMetaClass = []byte{eTypeMeta, 0} 38 keyShardingState = []byte{eTypeSharingState, 0} 39 keyConfig = []byte{eTypeConfig, 0} 40 _Version int = 2 41 ) 42 43 // constant to encode the type of entry in the DB 44 const ( 45 eTypeConfig byte = 1 46 eTypeClass byte = 2 47 eTypeShard byte = 4 48 eTypeMeta byte = 5 49 eTypeSharingState byte = 15 50 ) 51 52 // config configuration specific the stored schema 53 type config struct { 54 Version int 55 // add more fields 56 } 57 58 /* 59 Store is responsible for storing and persisting the schema in a structured manner. 60 It ensures that each class has a dedicated bucket, which includes metadata, and sharding state. 61 62 Schema Structure: 63 - Config: contains metadata related to parsing the schema 64 - Nested buckets for each class 65 66 Schema Structure for a class Bucket: 67 - Metadata contains models.Class 68 - Sharding state without shards 69 - Class shards: individual shard associated with the sharding state 70 71 By organizing the schema in this manner, it facilitates efficient management of class specific data during runtime. 72 In addition, old schema are backed up and migrated to the new structure for a seamless transitions 73 */ 74 type store struct { 75 version int // schema version 76 homeDir string // home directory of schema files 77 log logrus.FieldLogger 78 db *bolt.DB 79 } 80 81 // NewStore returns a new schema repository. Call the Open() method to open the underlying DB. 82 // To free the resources, call the Close() method. 83 func NewStore(homeDir string, logger logrus.FieldLogger) *store { 84 return &store{ 85 version: _Version, 86 homeDir: homeDir, 87 log: logger, 88 } 89 } 90 91 func initBoltDB(filePath string, version int, cfg *config) (*bolt.DB, error) { 92 db, err := bolt.Open(filePath, 0o600, nil) 93 if err != nil { 94 return nil, fmt.Errorf("open %q: %w", filePath, err) 95 } 96 root := func(tx *bolt.Tx) error { 97 b, err := tx.CreateBucket(schemaBucket) 98 // A new bucket has been created 99 if err == nil { 100 *cfg = config{Version: version} 101 return saveConfig(b, *cfg) 102 } 103 // load existing bucket 104 b = tx.Bucket(schemaBucket) 105 if b == nil { 106 return fmt.Errorf("retrieve existing bucket %q", schemaBucket) 107 } 108 // read config: config exists since version 2 109 data := b.Get(keyConfig) 110 if len(data) > 0 { 111 if err := json.Unmarshal(data, &cfg); err != nil { 112 return fmt.Errorf("cannot read config: %w", err) 113 } 114 } 115 return nil 116 } 117 118 return db, db.Update(root) 119 } 120 121 // Open the underlying DB 122 func (r *store) Open() (err error) { 123 if err := os.MkdirAll(r.homeDir, 0o777); err != nil { 124 return fmt.Errorf("create root directory %q: %w", r.homeDir, err) 125 } 126 cfg := config{} 127 path := path.Join(r.homeDir, "schema.db") 128 boltDB, err := initBoltDB(path, r.version, &cfg) 129 if err != nil { 130 return fmt.Errorf("init bolt_db: %w", err) 131 } 132 defer func() { 133 if err != nil { 134 boltDB.Close() 135 } 136 }() 137 r.db = boltDB 138 if cfg.Version < r.version { 139 if err := r.migrate(path, cfg.Version, r.version); err != nil { 140 return fmt.Errorf("migrate: %w", err) 141 } 142 } 143 if cfg.Version > r.version { 144 return fmt.Errorf("schema version %d higher than %d", cfg.Version, r.version) 145 } 146 return err 147 } 148 149 // Close the underlying DB 150 func (r *store) Close() { 151 r.db.Close() 152 } 153 154 // migrate from old to new schema 155 // It will back up the old schema file if it exists 156 func (r *store) migrate(filePath string, from, to int) (err error) { 157 r.log.Infof("schema migration from v%d to v%d process has started", from, to) 158 defer func() { 159 if err == nil { 160 r.log.Infof("successfully completed schema migration from v%d to v%d", from, to) 161 } 162 }() 163 state, err := r.loadSchemaV1() 164 if err != nil { 165 return fmt.Errorf("load old schema: %w", err) 166 } 167 if state != nil { 168 // create backupPath by copying file 169 backupPath := fmt.Sprintf("%s_v%d.bak", filePath, from) 170 if err := copyFile(backupPath, filePath); err != nil { 171 return fmt.Errorf("schema backup: %w", err) 172 } 173 174 // write new schema 175 f := func(tx *bolt.Tx) error { 176 b := tx.Bucket(schemaBucket) 177 if err := saveConfig(b, config{Version: to}); err != nil { 178 return err 179 } 180 b.Delete(schemaKey) // remove old schema 181 return r.saveAllTx(context.Background(), b, *state)(tx) 182 } 183 if err := r.db.Update(f); err != nil { 184 os.Remove(backupPath) 185 return fmt.Errorf("convert to new schema: %w", err) 186 } 187 } 188 return nil 189 } 190 191 // saveSchemaV1 might be needed to migrate from v2 to v0 192 func (r *store) saveSchemaV1(schema ucs.State) error { 193 schemaJSON, err := json.Marshal(schema) 194 if err != nil { 195 return errors.Wrapf(err, "marshal schema state to json") 196 } 197 198 return r.db.Update(func(tx *bolt.Tx) error { 199 b := tx.Bucket(schemaBucket) 200 return b.Put(schemaKey, schemaJSON) 201 }) 202 } 203 204 // loadSchemaV1 is needed to migrate from v0 to v2 205 func (r *store) loadSchemaV1() (*ucs.State, error) { 206 var schemaJSON []byte 207 r.db.View(func(tx *bolt.Tx) error { 208 b := tx.Bucket(schemaBucket) 209 schemaJSON = b.Get(schemaKey) 210 return nil 211 }) 212 213 if len(schemaJSON) == 0 { 214 return nil, nil 215 } 216 217 var state ucs.State 218 err := json.Unmarshal(schemaJSON, &state) 219 if err != nil { 220 return nil, errors.Wrapf(err, "parse schema state from JSON") 221 } 222 223 return &state, nil 224 } 225 226 // UpdateClass if it exists, otherwise return an error. 227 func (r *store) UpdateClass(_ context.Context, data ucs.ClassPayload) error { 228 classKey := encodeClassName(data.Name) 229 f := func(tx *bolt.Tx) error { 230 b := tx.Bucket(schemaBucket).Bucket(classKey) 231 if b == nil { 232 return fmt.Errorf("class not found") 233 } 234 return r.updateClass(b, data) 235 } 236 return r.db.Update(f) 237 } 238 239 // NewClass creates a new class if it doesn't exists, otherwise return an error 240 func (r *store) NewClass(_ context.Context, data ucs.ClassPayload) error { 241 classKey := encodeClassName(data.Name) 242 f := func(tx *bolt.Tx) error { 243 b, err := tx.Bucket(schemaBucket).CreateBucket(classKey) 244 if err != nil { 245 return err 246 } 247 return r.updateClass(b, data) 248 } 249 return r.db.Update(f) 250 } 251 252 func (r *store) updateClass(b *bolt.Bucket, data ucs.ClassPayload) error { 253 // remove old shards 254 if data.ReplaceShards { 255 cursor := b.Cursor() // b.Put before 256 for key, _ := cursor.First(); key != nil; { 257 if key[0] == eTypeShard { 258 b.Delete(key) 259 } 260 key, _ = cursor.Next() 261 } 262 } 263 if data.Metadata != nil { 264 if err := b.Put(keyMetaClass, data.Metadata); err != nil { 265 return err 266 } 267 } 268 269 if data.ShardingState != nil { 270 if err := b.Put(keyShardingState, data.ShardingState); err != nil { 271 return err 272 } 273 } 274 275 return appendShards(b, data.Shards, make([]byte, 1, 68)) 276 } 277 278 // DeleteClass class 279 func (r *store) DeleteClass(_ context.Context, class string) error { 280 classKey := encodeClassName(class) 281 f := func(tx *bolt.Tx) error { 282 err := tx.Bucket(schemaBucket).DeleteBucket(classKey) 283 if err != nil && !errors.Is(err, bolt.ErrBucketNotFound) { 284 return err 285 } 286 return nil 287 } 288 return r.db.Update(f) 289 } 290 291 // NewShards add new shards to an existing class 292 func (r *store) NewShards(_ context.Context, class string, shards []ucs.KeyValuePair) error { 293 classKey := encodeClassName(class) 294 f := func(tx *bolt.Tx) error { 295 b := tx.Bucket(schemaBucket).Bucket(classKey) 296 if b == nil { 297 return fmt.Errorf("class not found") 298 } 299 return appendShards(b, shards, make([]byte, 1, 68)) 300 } 301 return r.db.Update(f) 302 } 303 304 // Update shards updates (replaces) shards of existing class 305 // Error is returned if class or shard does not exist 306 func (r *store) UpdateShards(_ context.Context, class string, shards []ucs.KeyValuePair) error { 307 classKey := encodeClassName(class) 308 f := func(tx *bolt.Tx) error { 309 b := tx.Bucket(schemaBucket).Bucket(classKey) 310 if b == nil { 311 return fmt.Errorf("class not found") 312 } 313 keyBuf := make([]byte, 1, 68) 314 if !existShards(b, shards, keyBuf) { 315 return fmt.Errorf("shard not found") 316 } 317 return appendShards(b, shards, keyBuf) 318 } 319 return r.db.Update(f) 320 } 321 322 // DeleteShards of a specific class 323 // 324 // If the class or a shard does not exist then nothing is done and a nil error is returned 325 func (r *store) DeleteShards(_ context.Context, class string, shards []string) error { 326 classKey := encodeClassName(class) 327 f := func(tx *bolt.Tx) error { 328 b := tx.Bucket(schemaBucket).Bucket(classKey) 329 if b == nil { 330 return nil 331 } 332 return deleteShards(b, shards, make([]byte, 1, 68)) 333 } 334 return r.db.Update(f) 335 } 336 337 // Load loads the complete schema from the persistent storage 338 func (r *store) Load(ctx context.Context) (ucs.State, error) { 339 state := ucs.NewState(32) 340 for data := range r.load(ctx) { 341 if data.Error != nil { 342 return state, data.Error 343 } 344 cls := models.Class{Class: string(data.Name)} 345 ss := sharding.State{} 346 347 if err := json.Unmarshal(data.Metadata, &cls); err != nil { 348 return state, fmt.Errorf("unmarshal class %q", cls.Class) 349 } 350 if err := json.Unmarshal(data.ShardingState, &ss); err != nil { 351 return state, fmt.Errorf("unmarshal sharding state for class %q size %d", 352 cls.Class, len(data.ShardingState)) 353 } 354 if n := len(data.Shards); n > 0 { 355 ss.Physical = make(map[string]sharding.Physical, n) 356 } 357 for _, shard := range data.Shards { 358 phy := sharding.Physical{} 359 name := string(shard.Key) 360 if err := json.Unmarshal(shard.Value, &phy); err != nil { 361 return state, fmt.Errorf("unmarshal shard %q for class %q", name, cls.Class) 362 } 363 ss.Physical[name] = phy 364 } 365 state.ObjectSchema.Classes = append(state.ObjectSchema.Classes, &cls) 366 state.ShardingState[cls.Class] = &ss 367 } 368 return state, nil 369 } 370 371 func (r *store) load(ctx context.Context) <-chan ucs.ClassPayload { 372 ch := make(chan ucs.ClassPayload, 1) 373 f := func(tx *bolt.Tx) (err error) { 374 root := tx.Bucket(schemaBucket) 375 rootCursor := root.Cursor() 376 for cls, _ := rootCursor.First(); cls != nil; { 377 if cls[0] != eTypeClass { 378 cls, _ = rootCursor.Next() 379 continue 380 } 381 if err := ctx.Err(); err != nil { 382 ch <- ucs.ClassPayload{Error: err} 383 return err 384 } 385 b := root.Bucket(cls) 386 if b == nil { 387 err := fmt.Errorf("class not found") 388 ch <- ucs.ClassPayload{Error: err} 389 return err 390 } 391 x := ucs.ClassPayload{ 392 Name: string(cls[1:]), 393 Shards: make([]ucs.KeyValuePair, 0, 32), 394 } 395 cursor := b.Cursor() 396 for key, value := cursor.First(); key != nil; { 397 if bytes.Equal(key, keyMetaClass) { 398 x.Metadata = value 399 } else if bytes.Equal(key, keyShardingState) { 400 x.ShardingState = value 401 } else { 402 x.Shards = append(x.Shards, ucs.KeyValuePair{Key: string(key[1:]), Value: value}) 403 } 404 key, value = cursor.Next() 405 } 406 ch <- x 407 cls, _ = rootCursor.Next() 408 } 409 return nil 410 } 411 enterrors.GoWrapper(func() { 412 defer close(ch) 413 r.db.View(f) 414 }, r.log) 415 return ch 416 } 417 418 // Save saves the complete schema to the persistent storage 419 func (r *store) Save(ctx context.Context, ss ucs.State) error { 420 if (ss.ObjectSchema == nil || len(ss.ObjectSchema.Classes) == 0) && 421 len(ss.ShardingState) == 0 { 422 return nil // empty schema nothing to store 423 } 424 425 if ss.ObjectSchema == nil || 426 len(ss.ObjectSchema.Classes) == 0 || 427 len(ss.ShardingState) == 0 { 428 return fmt.Errorf("inconsistent schema: missing required fields") 429 } 430 431 f := func(tx *bolt.Tx) error { 432 root := tx.Bucket(schemaBucket) 433 return r.saveAllTx(ctx, root, ss)(tx) 434 } 435 return r.db.Update(f) 436 } 437 438 func (r *store) saveAllTx(ctx context.Context, root *bolt.Bucket, ss ucs.State) func(tx *bolt.Tx) error { 439 return func(tx *bolt.Tx) error { 440 rootCursor := root.Cursor() 441 for cls, _ := rootCursor.First(); cls != nil; { 442 if cls[0] == eTypeClass { 443 err := root.DeleteBucket(cls) 444 if err != nil && !errors.Is(err, bolt.ErrBucketNotFound) { 445 return err 446 } 447 } 448 cls, _ = rootCursor.Next() 449 } 450 for _, cls := range ss.ObjectSchema.Classes { 451 if err := ctx.Err(); err != nil { 452 return fmt.Errorf("context for class %q: %w", cls.Class, err) 453 } 454 sharding := ss.ShardingState[cls.Class] 455 payload, err := ucs.CreateClassPayload(cls, sharding) 456 if err != nil { 457 return fmt.Errorf("create payload for class %q: %w", cls.Class, err) 458 } 459 b, err := root.CreateBucket(encodeClassName(cls.Class)) 460 if err != nil { 461 return fmt.Errorf("create bucket for class %q: %w", cls.Class, err) 462 } 463 if err := r.updateClass(b, payload); err != nil { 464 return fmt.Errorf("update bucket %q: %w", cls.Class, err) 465 } 466 } 467 468 return nil 469 } 470 } 471 472 func saveConfig(root *bolt.Bucket, cfg config) error { 473 data, err := json.Marshal(&cfg) 474 if err != nil { 475 return fmt.Errorf("marshal config: %w", err) 476 } 477 if err := root.Put(keyConfig, data); err != nil { 478 return fmt.Errorf("write config: %w", err) 479 } 480 return nil 481 } 482 483 func existShards(b *bolt.Bucket, shards []ucs.KeyValuePair, keyBuf []byte) bool { 484 keyBuf[0] = eTypeShard 485 for _, pair := range shards { 486 kLen := len(pair.Key) + 1 487 keyBuf = append(keyBuf, pair.Key...) 488 if val := b.Get(keyBuf[:kLen]); val == nil { 489 return false 490 } 491 keyBuf = keyBuf[:1] 492 } 493 return true 494 } 495 496 func appendShards(b *bolt.Bucket, shards []ucs.KeyValuePair, key []byte) error { 497 key[0] = eTypeShard 498 for _, pair := range shards { 499 kLen := len(pair.Key) + 1 500 key = append(key, pair.Key...) 501 if err := b.Put(key[:kLen], pair.Value); err != nil { 502 return err 503 } 504 key = key[:1] 505 } 506 return nil 507 } 508 509 func deleteShards(b *bolt.Bucket, shards []string, keyBuf []byte) error { 510 keyBuf[0] = eTypeShard 511 for _, name := range shards { 512 kLen := len(name) + 1 513 keyBuf = append(keyBuf, name...) 514 if err := b.Delete(keyBuf[:kLen]); err != nil { 515 return err 516 } 517 keyBuf = keyBuf[:1] 518 } 519 return nil 520 } 521 522 func encodeClassName(name string) []byte { 523 len := len(name) + 1 524 buf := make([]byte, 1, len) 525 buf[0] = eTypeClass 526 buf = append(buf, name...) 527 return buf[:len] 528 } 529 530 func copyFile(dst, src string) error { 531 data, err := os.ReadFile(src) 532 if err != nil { 533 return err 534 } 535 return os.WriteFile(dst, data, 0o644) 536 } 537 538 // var _ = schemauc.Repo(&Repo{})