github.com/weaviate/weaviate@v1.24.6/usecases/backup/backend.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package backup 13 14 import ( 15 "context" 16 "encoding/json" 17 "fmt" 18 "io" 19 "os" 20 "path" 21 "runtime" 22 "sync/atomic" 23 "time" 24 25 enterrors "github.com/weaviate/weaviate/entities/errors" 26 27 "github.com/prometheus/client_golang/prometheus" 28 "github.com/sirupsen/logrus" 29 "github.com/weaviate/weaviate/entities/backup" 30 "github.com/weaviate/weaviate/entities/modulecapabilities" 31 "github.com/weaviate/weaviate/usecases/monitoring" 32 ) 33 34 // TODO adjust or make configurable 35 const ( 36 storeTimeout = 24 * time.Hour 37 metaTimeout = 20 * time.Minute 38 39 // DefaultChunkSize if size is not specified 40 DefaultChunkSize = 1 << 27 // 128MB 41 42 // maxChunkSize is the upper bound on the chunk size 43 maxChunkSize = 1 << 29 // 512MB 44 45 // minChunkSize is the lower bound on the chunk size 46 minChunkSize = 1 << 21 // 2MB 47 48 // maxCPUPercentage max CPU percentage can be consumed by the file writer 49 maxCPUPercentage = 80 50 51 // DefaultCPUPercentage default CPU percentage can be consumed by the file writer 52 DefaultCPUPercentage = 50 53 ) 54 55 const ( 56 // BackupFile used by a node to store its metadata 57 BackupFile = "backup.json" 58 // GlobalBackupFile used by coordinator to store its metadata 59 GlobalBackupFile = "backup_config.json" 60 GlobalRestoreFile = "restore_config.json" 61 _TempDirectory = ".backup.tmp" 62 ) 63 64 var _NUMCPU = runtime.NumCPU() 65 66 type objStore struct { 67 b modulecapabilities.BackupBackend 68 BasePath string 69 } 70 71 func (s *objStore) HomeDir() string { 72 return s.b.HomeDir(s.BasePath) 73 } 74 75 func (s *objStore) WriteToFile(ctx context.Context, key, destPath string) error { 76 return s.b.WriteToFile(ctx, s.BasePath, key, destPath) 77 } 78 79 // SourceDataPath is data path of all source files 80 func (s *objStore) SourceDataPath() string { 81 return s.b.SourceDataPath() 82 } 83 84 func (s *objStore) Write(ctx context.Context, key string, r io.ReadCloser) (int64, error) { 85 return s.b.Write(ctx, s.BasePath, key, r) 86 } 87 88 func (s *objStore) Read(ctx context.Context, key string, w io.WriteCloser) (int64, error) { 89 return s.b.Read(ctx, s.BasePath, key, w) 90 } 91 92 func (s *objStore) Initialize(ctx context.Context) error { 93 return s.b.Initialize(ctx, s.BasePath) 94 } 95 96 // meta marshals and uploads metadata 97 func (s *objStore) putMeta(ctx context.Context, key string, desc interface{}) error { 98 bytes, err := json.Marshal(desc) 99 if err != nil { 100 return fmt.Errorf("marshal meta file %q: %w", key, err) 101 } 102 ctx, cancel := context.WithTimeout(ctx, metaTimeout) 103 defer cancel() 104 if err := s.b.PutObject(ctx, s.BasePath, key, bytes); err != nil { 105 return fmt.Errorf("upload meta file %q: %w", key, err) 106 } 107 return nil 108 } 109 110 func (s *objStore) meta(ctx context.Context, key string, dest interface{}) error { 111 bytes, err := s.b.GetObject(ctx, s.BasePath, key) 112 if err != nil { 113 return err 114 } 115 err = json.Unmarshal(bytes, dest) 116 if err != nil { 117 return fmt.Errorf("marshal meta file %q: %w", key, err) 118 } 119 return nil 120 } 121 122 type nodeStore struct { 123 objStore 124 } 125 126 // Meta gets meta data using standard path or deprecated old path 127 // 128 // adjustBasePath: sets the base path to the old path if the backup has been created prior to v1.17. 129 func (s *nodeStore) Meta(ctx context.Context, backupID string, adjustBasePath bool) (*backup.BackupDescriptor, error) { 130 var result backup.BackupDescriptor 131 err := s.meta(ctx, BackupFile, &result) 132 if err != nil { 133 cs := &objStore{s.b, backupID} // for backward compatibility 134 if err := cs.meta(ctx, BackupFile, &result); err == nil { 135 if adjustBasePath { 136 s.objStore.BasePath = backupID 137 } 138 return &result, nil 139 } 140 } 141 142 return &result, err 143 } 144 145 // meta marshals and uploads metadata 146 func (s *nodeStore) PutMeta(ctx context.Context, desc *backup.BackupDescriptor) error { 147 return s.putMeta(ctx, BackupFile, desc) 148 } 149 150 type coordStore struct { 151 objStore 152 } 153 154 // PutMeta puts coordinator's global metadata into object store 155 func (s *coordStore) PutMeta(ctx context.Context, filename string, desc *backup.DistributedBackupDescriptor) error { 156 return s.putMeta(ctx, filename, desc) 157 } 158 159 // Meta gets coordinator's global metadata from object store 160 func (s *coordStore) Meta(ctx context.Context, filename string) (*backup.DistributedBackupDescriptor, error) { 161 var result backup.DistributedBackupDescriptor 162 err := s.meta(ctx, filename, &result) 163 if err != nil && filename == GlobalBackupFile { 164 var oldBackup backup.BackupDescriptor 165 if err := s.meta(ctx, BackupFile, &oldBackup); err == nil { 166 return oldBackup.ToDistributed(), nil 167 } 168 } 169 return &result, err 170 } 171 172 // uploader uploads backup artifacts. This includes db files and metadata 173 type uploader struct { 174 sourcer Sourcer 175 backend nodeStore 176 backupID string 177 zipConfig 178 setStatus func(st backup.Status) 179 log logrus.FieldLogger 180 } 181 182 func newUploader(sourcer Sourcer, backend nodeStore, 183 backupID string, setstatus func(st backup.Status), l logrus.FieldLogger, 184 ) *uploader { 185 return &uploader{ 186 sourcer, backend, 187 backupID, 188 newZipConfig(Compression{ 189 Level: DefaultCompression, 190 CPUPercentage: DefaultCPUPercentage, 191 ChunkSize: DefaultChunkSize, 192 }), 193 setstatus, 194 l, 195 } 196 } 197 198 func (u *uploader) withCompression(cfg zipConfig) *uploader { 199 u.zipConfig = cfg 200 return u 201 } 202 203 // all uploads all files in addition to the metadata file 204 func (u *uploader) all(ctx context.Context, classes []string, desc *backup.BackupDescriptor) (err error) { 205 u.setStatus(backup.Transferring) 206 desc.Status = string(backup.Transferring) 207 ch := u.sourcer.BackupDescriptors(ctx, desc.ID, classes) 208 defer func() { 209 // make sure context is not cancelled when uploading metadata 210 ctx := context.Background() 211 if err != nil { 212 desc.Error = err.Error() 213 err = fmt.Errorf("upload %w: %v", err, u.backend.PutMeta(ctx, desc)) 214 } else { 215 u.log.Info("start uploading meta data") 216 if err = u.backend.PutMeta(ctx, desc); err != nil { 217 desc.Status = string(backup.Transferred) 218 } 219 u.setStatus(backup.Success) 220 u.log.Info("finish uploading meta data") 221 } 222 }() 223 Loop: 224 for { 225 select { 226 case cdesc, ok := <-ch: 227 if !ok { 228 break Loop // we are done 229 } 230 if cdesc.Error != nil { 231 return cdesc.Error 232 } 233 u.log.WithField("class", cdesc.Name).Info("start uploading files") 234 if err := u.class(ctx, desc.ID, &cdesc); err != nil { 235 return err 236 } 237 desc.Classes = append(desc.Classes, cdesc) 238 u.log.WithField("class", cdesc.Name).Info("finish uploading files") 239 240 case <-ctx.Done(): 241 return ctx.Err() 242 } 243 } 244 u.setStatus(backup.Transferred) 245 desc.Status = string(backup.Success) 246 return nil 247 } 248 249 // class uploads one class 250 func (u *uploader) class(ctx context.Context, id string, desc *backup.ClassDescriptor) (err error) { 251 classLabel := desc.Name 252 if monitoring.GetMetrics().Group { 253 classLabel = "n/a" 254 } 255 metric, err := monitoring.GetMetrics().BackupStoreDurations.GetMetricWithLabelValues(getType(u.backend.b), classLabel) 256 if err == nil { 257 timer := prometheus.NewTimer(metric) 258 defer timer.ObserveDuration() 259 } 260 defer func() { 261 // backups need to be released anyway 262 enterrors.GoWrapper(func() { u.sourcer.ReleaseBackup(context.Background(), id, desc.Name) }, u.log) 263 }() 264 ctx, cancel := context.WithTimeout(ctx, storeTimeout) 265 defer cancel() 266 nShards := len(desc.Shards) 267 if nShards == 0 { 268 return nil 269 } 270 271 desc.Chunks = make(map[int32][]string, 1+nShards/2) 272 var ( 273 hasJobs atomic.Bool 274 lastChunk = int32(0) 275 nWorker = u.GoPoolSize 276 ) 277 if nWorker > nShards { 278 nWorker = nShards 279 } 280 hasJobs.Store(nShards > 0) 281 282 // jobs produces work for the processor 283 jobs := func(xs []*backup.ShardDescriptor) <-chan *backup.ShardDescriptor { 284 sendCh := make(chan *backup.ShardDescriptor) 285 f := func() { 286 defer close(sendCh) 287 defer hasJobs.Store(false) 288 289 for _, shard := range xs { 290 select { 291 case sendCh <- shard: 292 // cancellation will happen for two reasons: 293 // - 1. if the whole operation has been aborted, 294 // - 2. or if the processor routine returns an error 295 case <-ctx.Done(): 296 return 297 } 298 } 299 } 300 enterrors.GoWrapper(f, u.log) 301 return sendCh 302 } 303 304 // processor 305 processor := func(nWorker int, sender <-chan *backup.ShardDescriptor) <-chan chuckShards { 306 eg, ctx := enterrors.NewErrorGroupWithContextWrapper(u.log, ctx) 307 eg.SetLimit(nWorker) 308 recvCh := make(chan chuckShards, nWorker) 309 f := func() { 310 defer close(recvCh) 311 for i := 0; i < nWorker; i++ { 312 eg.Go(func() error { 313 // operation might have been aborted see comment above 314 if err := ctx.Err(); err != nil { 315 return err 316 } 317 for hasJobs.Load() { 318 chunk := atomic.AddInt32(&lastChunk, 1) 319 shards, err := u.compress(ctx, desc.Name, chunk, sender) 320 if err != nil { 321 return err 322 } 323 if m := int32(len(shards)); m > 0 { 324 recvCh <- chuckShards{chunk, shards} 325 } 326 } 327 return err 328 }) 329 } 330 err = eg.Wait() 331 } 332 enterrors.GoWrapper(f, u.log) 333 return recvCh 334 } 335 336 for x := range processor(nWorker, jobs(desc.Shards)) { 337 desc.Chunks[x.chunk] = x.shards 338 } 339 return 340 } 341 342 type chuckShards struct { 343 chunk int32 344 shards []string 345 } 346 347 func (u *uploader) compress(ctx context.Context, 348 class string, // class name 349 chunk int32, // chunk index 350 ch <-chan *backup.ShardDescriptor, // chan of shards 351 ) ([]string, error) { 352 var ( 353 chunkKey = chunkKey(class, chunk) 354 shards = make([]string, 0, 10) 355 // add tolerance to enable better optimization of the chunk size 356 maxSize = int64(u.ChunkSize + u.ChunkSize/20) // size + 5% 357 ) 358 zip, reader := NewZip(u.backend.SourceDataPath(), u.Level) 359 producer := func() error { 360 defer zip.Close() 361 lastShardSize := int64(0) 362 for shard := range ch { 363 if _, err := zip.WriteShard(ctx, shard); err != nil { 364 return err 365 } 366 shard.Chunk = chunk 367 shards = append(shards, shard.Name) 368 shard.ClearTemporary() 369 370 zip.gzw.Flush() // flush new shard 371 lastShardSize = zip.lastWritten() - lastShardSize 372 if zip.lastWritten()+lastShardSize > maxSize { 373 break 374 } 375 } 376 return nil 377 } 378 379 // consumer 380 eg := enterrors.NewErrorGroupWrapper(u.log) 381 eg.Go(func() error { 382 if _, err := u.backend.Write(ctx, chunkKey, reader); err != nil { 383 return err 384 } 385 return nil 386 }) 387 388 if err := producer(); err != nil { 389 return shards, err 390 } 391 // wait for the consumer to finish 392 return shards, eg.Wait() 393 } 394 395 // fileWriter downloads files from object store and writes files to the destination folder destDir 396 type fileWriter struct { 397 sourcer Sourcer 398 backend nodeStore 399 tempDir string 400 destDir string 401 movedFiles []string // files successfully moved to destination folder 402 compressed bool 403 GoPoolSize int 404 migrator func(classPath string) error 405 logger logrus.FieldLogger 406 } 407 408 func newFileWriter(sourcer Sourcer, backend nodeStore, 409 compressed bool, logger logrus.FieldLogger, 410 ) *fileWriter { 411 destDir := backend.SourceDataPath() 412 return &fileWriter{ 413 sourcer: sourcer, 414 backend: backend, 415 destDir: destDir, 416 tempDir: path.Join(destDir, _TempDirectory), 417 movedFiles: make([]string, 0, 64), 418 compressed: compressed, 419 GoPoolSize: routinePoolSize(50), 420 logger: logger, 421 } 422 } 423 424 func (fw *fileWriter) WithPoolPercentage(p int) *fileWriter { 425 fw.GoPoolSize = routinePoolSize(p) 426 return fw 427 } 428 429 func (fw *fileWriter) setMigrator(m func(classPath string) error) { fw.migrator = m } 430 431 // Write downloads files and put them in the destination directory 432 func (fw *fileWriter) Write(ctx context.Context, desc *backup.ClassDescriptor) (rollback func() error, err error) { 433 if len(desc.Shards) == 0 { // nothing to copy 434 return func() error { return nil }, nil 435 } 436 classTempDir := path.Join(fw.tempDir, desc.Name) 437 defer func() { 438 if err != nil { 439 if rerr := fw.rollBack(); rerr != nil { 440 err = fmt.Errorf("%w: %v", err, rerr) 441 } 442 } 443 os.RemoveAll(classTempDir) 444 }() 445 446 if err := fw.writeTempFiles(ctx, classTempDir, desc); err != nil { 447 return nil, fmt.Errorf("get files: %w", err) 448 } 449 450 if fw.migrator != nil { 451 if err := fw.migrator(classTempDir); err != nil { 452 return nil, fmt.Errorf("migrate from pre 1.23: %w", err) 453 } 454 } 455 456 if err := fw.moveAll(classTempDir); err != nil { 457 return nil, fmt.Errorf("move files to destination: %w", err) 458 } 459 460 return func() error { return fw.rollBack() }, nil 461 } 462 463 // writeTempFiles writes class files into a temporary directory 464 // temporary directory path = d.tempDir/className 465 // Function makes sure that created files will be removed in case of an error 466 func (fw *fileWriter) writeTempFiles(ctx context.Context, classTempDir string, desc *backup.ClassDescriptor) (err error) { 467 if err := os.RemoveAll(classTempDir); err != nil { 468 return fmt.Errorf("remove %s: %w", classTempDir, err) 469 } 470 if err := os.MkdirAll(classTempDir, os.ModePerm); err != nil { 471 return fmt.Errorf("create temp class folder %s: %w", classTempDir, err) 472 } 473 ctx, cancel := context.WithCancel(ctx) 474 defer cancel() 475 476 // no compression processed as before 477 eg, ctx := enterrors.NewErrorGroupWithContextWrapper(fw.logger, ctx) 478 if !fw.compressed { 479 eg.SetLimit(2 * _NUMCPU) 480 for _, shard := range desc.Shards { 481 shard := shard 482 eg.Go(func() error { return fw.writeTempShard(ctx, shard, classTempDir) }, shard.Name) 483 } 484 return eg.Wait() 485 } 486 487 // source files are compressed 488 489 eg.SetLimit(fw.GoPoolSize) 490 for k := range desc.Chunks { 491 chunk := chunkKey(desc.Name, k) 492 eg.Go(func() error { 493 uz, w := NewUnzip(classTempDir) 494 enterrors.GoWrapper(func() { 495 fw.backend.Read(ctx, chunk, w) 496 }, fw.logger) 497 _, err := uz.ReadChunk() 498 return err 499 }) 500 } 501 return eg.Wait() 502 } 503 504 func (fw *fileWriter) writeTempShard(ctx context.Context, sd *backup.ShardDescriptor, classTempDir string) error { 505 for _, key := range sd.Files { 506 destPath := path.Join(classTempDir, key) 507 destDir := path.Dir(destPath) 508 if err := os.MkdirAll(destDir, os.ModePerm); err != nil { 509 return fmt.Errorf("create folder %s: %w", destDir, err) 510 } 511 if err := fw.backend.WriteToFile(ctx, key, destPath); err != nil { 512 return fmt.Errorf("write file %s: %w", destPath, err) 513 } 514 } 515 destPath := path.Join(classTempDir, sd.DocIDCounterPath) 516 if err := os.WriteFile(destPath, sd.DocIDCounter, os.ModePerm); err != nil { 517 return fmt.Errorf("write counter file %s: %w", destPath, err) 518 } 519 destPath = path.Join(classTempDir, sd.PropLengthTrackerPath) 520 if err := os.WriteFile(destPath, sd.PropLengthTracker, os.ModePerm); err != nil { 521 return fmt.Errorf("write prop file %s: %w", destPath, err) 522 } 523 destPath = path.Join(classTempDir, sd.ShardVersionPath) 524 if err := os.WriteFile(destPath, sd.Version, os.ModePerm); err != nil { 525 return fmt.Errorf("write version file %s: %w", destPath, err) 526 } 527 return nil 528 } 529 530 // moveAll moves all files to the destination 531 func (fw *fileWriter) moveAll(classTempDir string) (err error) { 532 files, err := os.ReadDir(classTempDir) 533 if err != nil { 534 return fmt.Errorf("read %s", classTempDir) 535 } 536 destDir := fw.destDir 537 for _, key := range files { 538 from := path.Join(classTempDir, key.Name()) 539 to := path.Join(destDir, key.Name()) 540 if err := os.Rename(from, to); err != nil { 541 return fmt.Errorf("move %s %s: %w", from, to, err) 542 } 543 fw.movedFiles = append(fw.movedFiles, to) 544 } 545 546 return nil 547 } 548 549 // rollBack successfully written files 550 func (fw *fileWriter) rollBack() (err error) { 551 // rollback successfully moved files 552 for _, fpath := range fw.movedFiles { 553 if rerr := os.RemoveAll(fpath); rerr != nil && err == nil { 554 err = fmt.Errorf("rollback %s: %w", fpath, rerr) 555 } 556 } 557 return err 558 } 559 560 func chunkKey(class string, id int32) string { 561 return fmt.Sprintf("%s/chunk-%d", class, id) 562 } 563 564 func routinePoolSize(percentage int) int { 565 if percentage == 0 { // default value 566 percentage = DefaultCPUPercentage 567 } else if percentage > maxCPUPercentage { 568 percentage = maxCPUPercentage 569 } 570 if x := (_NUMCPU * percentage) / 100; x > 0 { 571 return x 572 } 573 return 1 574 }