github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/backup.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "context" 16 "fmt" 17 "sync" 18 "time" 19 20 enterrors "github.com/weaviate/weaviate/entities/errors" 21 22 "github.com/pkg/errors" 23 "github.com/sirupsen/logrus" 24 "github.com/weaviate/weaviate/entities/backup" 25 "github.com/weaviate/weaviate/entities/schema" 26 ) 27 28 type BackupState struct { 29 BackupID string 30 InProgress bool 31 } 32 33 // Backupable returns whether all given class can be backed up. 34 func (db *DB) Backupable(ctx context.Context, classes []string) error { 35 for _, c := range classes { 36 className := schema.ClassName(c) 37 idx := db.GetIndex(className) 38 if idx == nil || idx.Config.ClassName != className { 39 return fmt.Errorf("class %v doesn't exist", c) 40 } 41 } 42 return nil 43 } 44 45 // ListBackupable returns a list of all classes which can be backed up. 46 func (db *DB) ListBackupable() []string { 47 cs := make([]string, 0, len(db.indices)) 48 db.indexLock.RLock() 49 defer db.indexLock.RUnlock() 50 for _, idx := range db.indices { 51 cls := string(idx.Config.ClassName) 52 cs = append(cs, cls) 53 } 54 return cs 55 } 56 57 // BackupDescriptors returns a channel of class descriptors. 58 // Class descriptor records everything needed to restore a class 59 // If an error happens a descriptor with an error will be written to the channel just before closing it. 60 func (db *DB) BackupDescriptors(ctx context.Context, bakid string, classes []string, 61 ) <-chan backup.ClassDescriptor { 62 ds := make(chan backup.ClassDescriptor, len(classes)) 63 f := func() { 64 for _, c := range classes { 65 desc := backup.ClassDescriptor{Name: c} 66 idx := db.GetIndex(schema.ClassName(c)) 67 if idx == nil { 68 desc.Error = fmt.Errorf("class %v doesn't exist any more", c) 69 } else if err := idx.descriptor(ctx, bakid, &desc); err != nil { 70 desc.Error = fmt.Errorf("backup class %v descriptor: %w", c, err) 71 } 72 ds <- desc 73 if desc.Error != nil { 74 break 75 } 76 } 77 close(ds) 78 } 79 enterrors.GoWrapper(f, db.logger) 80 return ds 81 } 82 83 func (db *DB) ShardsBackup( 84 ctx context.Context, bakID, class string, shards []string, 85 ) (_ backup.ClassDescriptor, err error) { 86 cd := backup.ClassDescriptor{Name: class} 87 idx := db.GetIndex(schema.ClassName(class)) 88 if idx == nil { 89 return cd, fmt.Errorf("no index for class %q", class) 90 } 91 92 if err := idx.initBackup(bakID); err != nil { 93 return cd, fmt.Errorf("init backup state for class %q: %w", class, err) 94 } 95 96 defer func() { 97 if err != nil { 98 enterrors.GoWrapper(func() { idx.ReleaseBackup(ctx, bakID) }, db.logger) 99 } 100 }() 101 102 sm := make(map[string]ShardLike, len(shards)) 103 for _, shardName := range shards { 104 shard := idx.shards.Load(shardName) 105 if shard == nil { 106 return cd, fmt.Errorf("no shard %q for class %q", shardName, class) 107 } 108 sm[shardName] = shard 109 } 110 111 // prevent writing into the index during collection of metadata 112 idx.backupMutex.Lock() 113 defer idx.backupMutex.Unlock() 114 for shardName, shard := range sm { 115 if err := shard.BeginBackup(ctx); err != nil { 116 return cd, fmt.Errorf("class %q: shard %q: begin backup: %w", class, shardName, err) 117 } 118 119 sd := backup.ShardDescriptor{Name: shardName} 120 if err := shard.ListBackupFiles(ctx, &sd); err != nil { 121 return cd, fmt.Errorf("class %q: shard %q: list backup files: %w", class, shardName, err) 122 } 123 124 cd.Shards = append(cd.Shards, &sd) 125 } 126 127 return cd, nil 128 } 129 130 // ReleaseBackup release resources acquired by the index during backup 131 func (db *DB) ReleaseBackup(ctx context.Context, bakID, class string) (err error) { 132 fields := logrus.Fields{ 133 "op": "release_backup", 134 "class": class, 135 "id": bakID, 136 } 137 db.logger.WithFields(fields).Debug("starting") 138 begin := time.Now() 139 defer func() { 140 l := db.logger.WithFields(fields).WithField("took", time.Since(begin)) 141 if err != nil { 142 l.Error(err) 143 return 144 } 145 l.Debug("finish") 146 }() 147 148 idx := db.GetIndex(schema.ClassName(class)) 149 if idx != nil { 150 return idx.ReleaseBackup(ctx, bakID) 151 } 152 return nil 153 } 154 155 func (db *DB) ClassExists(name string) bool { 156 return db.IndexExists(schema.ClassName(name)) 157 } 158 159 // Returns the list of nodes where shards of class are contained. 160 // If there are no shards for the class, returns an empty list 161 // If there are shards for the class but no nodes are found, return an error 162 func (db *DB) Shards(ctx context.Context, class string) ([]string, error) { 163 unique := make(map[string]struct{}) 164 165 ss := db.schemaGetter.CopyShardingState(class) 166 if len(ss.Physical) == 0 { 167 return []string{}, nil 168 } 169 170 for _, shard := range ss.Physical { 171 for _, node := range shard.BelongsToNodes { 172 unique[node] = struct{}{} 173 } 174 } 175 176 var ( 177 nodes = make([]string, len(unique)) 178 counter = 0 179 ) 180 181 for node := range unique { 182 nodes[counter] = node 183 counter++ 184 } 185 if len(nodes) == 0 { 186 return nil, fmt.Errorf("found %v shards, but has 0 nodes", len(ss.Physical)) 187 } 188 189 return nodes, nil 190 } 191 192 func (db *DB) ListClasses(ctx context.Context) []string { 193 classes := db.schemaGetter.GetSchemaSkipAuth().Objects.Classes 194 classNames := make([]string, len(classes)) 195 196 for i, class := range classes { 197 classNames[i] = class.Class 198 } 199 200 return classNames 201 } 202 203 // descriptor record everything needed to restore a class 204 func (i *Index) descriptor(ctx context.Context, backupID string, desc *backup.ClassDescriptor) (err error) { 205 if err := i.initBackup(backupID); err != nil { 206 return err 207 } 208 defer func() { 209 if err != nil { 210 enterrors.GoWrapper(func() { i.ReleaseBackup(ctx, backupID) }, i.logger) 211 } 212 }() 213 // prevent writing into the index during collection of metadata 214 i.backupMutex.Lock() 215 defer i.backupMutex.Unlock() 216 217 if err = i.ForEachShard(func(name string, s ShardLike) error { 218 if err = s.BeginBackup(ctx); err != nil { 219 return fmt.Errorf("pause compaction and flush: %w", err) 220 } 221 var sd backup.ShardDescriptor 222 if err := s.ListBackupFiles(ctx, &sd); err != nil { 223 return fmt.Errorf("list shard %v files: %w", s.Name(), err) 224 } 225 226 desc.Shards = append(desc.Shards, &sd) 227 return nil 228 }); err != nil { 229 return err 230 } 231 232 if desc.ShardingState, err = i.marshalShardingState(); err != nil { 233 return fmt.Errorf("marshal sharding state %w", err) 234 } 235 if desc.Schema, err = i.marshalSchema(); err != nil { 236 return fmt.Errorf("marshal schema %w", err) 237 } 238 return ctx.Err() 239 } 240 241 // ReleaseBackup marks the specified backup as inactive and restarts all 242 // async background and maintenance processes. It errors if the backup does not exist 243 // or is already inactive. 244 func (i *Index) ReleaseBackup(ctx context.Context, id string) error { 245 i.logger.WithField("backup_id", id).WithField("class", i.Config.ClassName).Info("release backup") 246 defer i.resetBackupState() 247 if err := i.resumeMaintenanceCycles(ctx); err != nil { 248 return err 249 } 250 return nil 251 } 252 253 func (i *Index) initBackup(id string) error { 254 new := &BackupState{ 255 BackupID: id, 256 InProgress: true, 257 } 258 if !i.lastBackup.CompareAndSwap(nil, new) { 259 bid := "" 260 if x := i.lastBackup.Load(); x != nil { 261 bid = x.BackupID 262 } 263 return errors.Errorf( 264 "cannot create new backup, backup ‘%s’ is not yet released, this "+ 265 "means its contents have not yet been fully copied to its destination, "+ 266 "try again later", bid) 267 } 268 269 return nil 270 } 271 272 func (i *Index) resetBackupState() { 273 i.lastBackup.Store(nil) 274 } 275 276 func (i *Index) resumeMaintenanceCycles(ctx context.Context) (lastErr error) { 277 i.ForEachShard(func(name string, shard ShardLike) error { 278 if err := shard.resumeMaintenanceCycles(ctx); err != nil { 279 lastErr = err 280 i.logger.WithField("shard", name).WithField("op", "resume_maintenance").Error(err) 281 } 282 time.Sleep(time.Millisecond * 10) 283 return nil 284 }) 285 return lastErr 286 } 287 288 func (i *Index) marshalShardingState() ([]byte, error) { 289 b, err := i.getSchema.CopyShardingState(i.Config.ClassName.String()).JSON() 290 if err != nil { 291 return nil, errors.Wrap(err, "marshal sharding state") 292 } 293 294 return b, nil 295 } 296 297 func (i *Index) marshalSchema() ([]byte, error) { 298 schema := i.getSchema.GetSchemaSkipAuth() 299 300 b, err := schema.GetClass(i.Config.ClassName).MarshalBinary() 301 if err != nil { 302 return nil, errors.Wrap(err, "marshal schema") 303 } 304 305 return b, err 306 } 307 308 const ( 309 mutexRetryDuration = time.Millisecond * 500 310 mutexNotifyDuration = 20 * time.Second 311 ) 312 313 // backupMutex is an adapter built around rwmutex that facilitates cooperative blocking between write and read locks 314 type backupMutex struct { 315 sync.RWMutex 316 log logrus.FieldLogger 317 retryDuration time.Duration 318 notifyDuration time.Duration 319 } 320 321 // LockWithContext attempts to acquire a write lock while respecting the provided context. 322 // It reports whether the lock acquisition was successful or if the context has been cancelled. 323 func (m *backupMutex) LockWithContext(ctx context.Context) error { 324 return m.lock(ctx, m.TryLock) 325 } 326 327 func (m *backupMutex) lock(ctx context.Context, tryLock func() bool) error { 328 if tryLock() { 329 return nil 330 } 331 curTime := time.Now() 332 t := time.NewTicker(m.retryDuration) 333 defer t.Stop() 334 for { 335 select { 336 case <-ctx.Done(): 337 return ctx.Err() 338 case <-t.C: 339 if tryLock() { 340 return nil 341 } 342 if time.Since(curTime) > m.notifyDuration { 343 curTime = time.Now() 344 m.log.Info("backup process waiting for ongoing writes to finish") 345 } 346 } 347 } 348 } 349 350 func (s *backupMutex) RLockGuard(reader func() error) error { 351 s.RLock() 352 defer s.RUnlock() 353 return reader() 354 }