github.com/weaviate/weaviate@v1.24.6/usecases/backup/restorer.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package backup 13 14 import ( 15 "context" 16 "encoding/json" 17 "errors" 18 "fmt" 19 "reflect" 20 "sync" 21 "time" 22 23 enterrors "github.com/weaviate/weaviate/entities/errors" 24 25 "github.com/prometheus/client_golang/prometheus" 26 "github.com/sirupsen/logrus" 27 "github.com/weaviate/weaviate/entities/backup" 28 "github.com/weaviate/weaviate/entities/models" 29 "github.com/weaviate/weaviate/entities/schema" 30 "github.com/weaviate/weaviate/usecases/monitoring" 31 migratefs "github.com/weaviate/weaviate/usecases/schema/migrate/fs" 32 "github.com/weaviate/weaviate/usecases/sharding" 33 ) 34 35 type restorer struct { 36 node string // node name 37 logger logrus.FieldLogger 38 sourcer Sourcer 39 backends BackupBackendProvider 40 schema schemaManger 41 shardSyncChan 42 43 // TODO: keeping status in memory after restore has been done 44 // is not a proper solution for communicating status to the user. 45 // On app crash or restart this data will be lost 46 // This should be regarded as workaround and should be fixed asap 47 restoreStatusMap sync.Map 48 } 49 50 func newRestorer(node string, logger logrus.FieldLogger, 51 sourcer Sourcer, 52 backends BackupBackendProvider, 53 schema schemaManger, 54 ) *restorer { 55 return &restorer{ 56 node: node, 57 logger: logger, 58 sourcer: sourcer, 59 backends: backends, 60 schema: schema, 61 shardSyncChan: shardSyncChan{coordChan: make(chan interface{}, 5)}, 62 } 63 } 64 65 func (r *restorer) restore(ctx context.Context, 66 req *Request, 67 desc *backup.BackupDescriptor, 68 store nodeStore, 69 ) (CanCommitResponse, error) { 70 expiration := req.Duration 71 if expiration > _TimeoutShardCommit { 72 expiration = _TimeoutShardCommit 73 } 74 ret := CanCommitResponse{ 75 Method: OpCreate, 76 ID: req.ID, 77 Timeout: expiration, 78 } 79 80 destPath := store.HomeDir() 81 82 // make sure there is no active restore 83 if prevID := r.lastOp.renew(req.ID, destPath); prevID != "" { 84 err := fmt.Errorf("restore %s already in progress", prevID) 85 return ret, err 86 } 87 r.waitingForCoordinatorToCommit.Store(true) // is set to false by wait() 88 89 f := func() { 90 var err error 91 status := Status{ 92 Path: destPath, 93 StartedAt: time.Now().UTC(), 94 Status: backup.Transferring, 95 } 96 defer func() { 97 status.CompletedAt = time.Now().UTC() 98 if err == nil { 99 status.Status = backup.Success 100 } else { 101 status.Err = err.Error() 102 status.Status = backup.Failed 103 } 104 r.restoreStatusMap.Store(basePath(req.Backend, req.ID), status) 105 r.lastOp.reset() 106 }() 107 108 if err = r.waitForCoordinator(expiration, req.ID); err != nil { 109 r.logger.WithField("action", "create_backup"). 110 Error(err) 111 r.lastAsyncError = err 112 return 113 } 114 115 err = r.restoreAll(context.Background(), desc, req.CPUPercentage, store, req.NodeMapping) 116 logFields := logrus.Fields{"action": "restore", "backup_id": req.ID} 117 if err != nil { 118 r.logger.WithFields(logFields).Error(err) 119 } else { 120 r.logger.WithFields(logFields).Info("backup restored successfully") 121 } 122 } 123 enterrors.GoWrapper(f, r.logger) 124 125 return ret, nil 126 } 127 128 func (r *restorer) restoreAll(ctx context.Context, 129 desc *backup.BackupDescriptor, cpuPercentage int, 130 store nodeStore, nodeMapping map[string]string, 131 ) (err error) { 132 compressed := desc.Version > version1 133 r.lastOp.set(backup.Transferring) 134 for _, cdesc := range desc.Classes { 135 if err := r.restoreOne(ctx, &cdesc, desc.ServerVersion, compressed, cpuPercentage, store, nodeMapping); err != nil { 136 return fmt.Errorf("restore class %s: %w", cdesc.Name, err) 137 } 138 r.logger.WithField("action", "restore"). 139 WithField("backup_id", desc.ID). 140 WithField("class", cdesc.Name).Info("successfully restored") 141 } 142 return nil 143 } 144 145 func getType(myvar interface{}) string { 146 if t := reflect.TypeOf(myvar); t.Kind() == reflect.Ptr { 147 return "*" + t.Elem().Name() 148 } else { 149 return t.Name() 150 } 151 } 152 153 func (r *restorer) restoreOne(ctx context.Context, 154 desc *backup.ClassDescriptor, serverVersion string, 155 compressed bool, cpuPercentage int, store nodeStore, nodeMapping map[string]string, 156 ) (err error) { 157 classLabel := desc.Name 158 if monitoring.GetMetrics().Group { 159 classLabel = "n/a" 160 } 161 metric, err := monitoring.GetMetrics().BackupRestoreDurations.GetMetricWithLabelValues(getType(store.b), classLabel) 162 if err != nil { 163 timer := prometheus.NewTimer(metric) 164 defer timer.ObserveDuration() 165 } 166 167 if r.sourcer.ClassExists(desc.Name) { 168 return fmt.Errorf("already exists") 169 } 170 fw := newFileWriter(r.sourcer, store, compressed, r.logger). 171 WithPoolPercentage(cpuPercentage) 172 173 // Pre-v1.23 versions store files in a flat format 174 if serverVersion < "1.23" { 175 f, err := hfsMigrator(desc, r.node, serverVersion) 176 if err != nil { 177 return fmt.Errorf("migrate to pre 1.23: %w", err) 178 } 179 fw.setMigrator(f) 180 } 181 182 rollback, err := fw.Write(ctx, desc) 183 if err != nil { 184 return fmt.Errorf("write files: %w", err) 185 } 186 if err := r.schema.RestoreClass(ctx, desc, nodeMapping); err != nil { 187 if rerr := rollback(); rerr != nil { 188 r.logger.WithField("className", desc.Name).WithField("action", "rollback").Error(rerr) 189 } 190 return fmt.Errorf("restore schema: %w", err) 191 } 192 return nil 193 } 194 195 func (r *restorer) status(backend, ID string) (Status, error) { 196 if st := r.lastOp.get(); st.ID == ID { 197 return Status{ 198 Path: st.Path, 199 StartedAt: st.Starttime, 200 Status: st.Status, 201 }, nil 202 } 203 ref := basePath(backend, ID) 204 istatus, ok := r.restoreStatusMap.Load(ref) 205 if !ok { 206 err := fmt.Errorf("status not found: %s", ref) 207 return Status{}, backup.NewErrNotFound(err) 208 } 209 return istatus.(Status), nil 210 } 211 212 func (r *restorer) validate(ctx context.Context, store *nodeStore, req *Request) (*backup.BackupDescriptor, []string, error) { 213 destPath := store.HomeDir() 214 meta, err := store.Meta(ctx, req.ID, true) 215 if err != nil { 216 nerr := backup.ErrNotFound{} 217 if errors.As(err, &nerr) { 218 return nil, nil, fmt.Errorf("restorer cannot validate: %w: %q (%w)", errMetaNotFound, destPath, err) 219 } 220 return nil, nil, fmt.Errorf("find backup %s: %w", destPath, err) 221 } 222 if meta.ID != req.ID { 223 return nil, nil, fmt.Errorf("wrong backup file: expected %q got %q", req.ID, meta.ID) 224 } 225 if meta.Status != string(backup.Success) { 226 err = fmt.Errorf("invalid backup %s status: %s", destPath, meta.Status) 227 return nil, nil, err 228 } 229 if err := meta.Validate(meta.Version > version1); err != nil { 230 return nil, nil, fmt.Errorf("corrupted backup file: %w", err) 231 } 232 if v := meta.Version; v > Version { 233 return nil, nil, fmt.Errorf("%s: %s > %s", errMsgHigherVersion, v, Version) 234 } 235 cs := meta.List() 236 if len(req.Classes) > 0 { 237 if first := meta.AllExist(req.Classes); first != "" { 238 err = fmt.Errorf("class %s doesn't exist in the backup, but does have %v: ", first, cs) 239 return nil, cs, err 240 } 241 meta.Include(req.Classes) 242 } 243 return meta, cs, nil 244 } 245 246 // oneClassSchema allows for creating schema with one class 247 // This is required when migrating to hierarchical file structure from pre-v1.23 248 type oneClassSchema struct { 249 cls *models.Class 250 ss *sharding.State 251 } 252 253 func (s oneClassSchema) CopyShardingState(class string) *sharding.State { 254 return s.ss 255 } 256 257 func (s oneClassSchema) GetSchemaSkipAuth() schema.Schema { 258 return schema.Schema{ 259 Objects: &models.Schema{ 260 Classes: []*models.Class{s.cls}, 261 }, 262 } 263 } 264 265 // hfsMigrator builds and return a class migrator ready for use 266 func hfsMigrator(desc *backup.ClassDescriptor, nodeName string, serverVersion string) (func(classDir string) error, error) { 267 if serverVersion >= "1.23" { 268 return func(string) error { return nil }, nil 269 } 270 var ss sharding.State 271 if desc.ShardingState != nil { 272 err := json.Unmarshal(desc.ShardingState, &ss) 273 if err != nil { 274 return nil, fmt.Errorf("marshal sharding state: %w", err) 275 } 276 } 277 ss.SetLocalName(nodeName) 278 279 // get schema and sharding state 280 class := &models.Class{} 281 if err := json.Unmarshal(desc.Schema, &class); err != nil { 282 return nil, fmt.Errorf("marshal class schema: %w", err) 283 } 284 285 return func(classDir string) error { 286 return migratefs.MigrateToHierarchicalFS(classDir, oneClassSchema{class, &ss}) 287 }, nil 288 }