github.com/weaviate/weaviate@v1.24.6/entities/backup/descriptor.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package backup 13 14 import ( 15 "fmt" 16 "time" 17 ) 18 19 // NodeDescriptor contains data related to one participant in DBRO 20 type NodeDescriptor struct { 21 Classes []string `json:"classes"` 22 Status Status `json:"status"` 23 Error string `json:"error"` 24 } 25 26 // DistributedBAckupDescriptor contains everything need to completely restore a distributed backup 27 type DistributedBackupDescriptor struct { 28 StartedAt time.Time `json:"startedAt"` 29 CompletedAt time.Time `json:"completedAt"` 30 ID string `json:"id"` // User created backup id 31 Nodes map[string]*NodeDescriptor `json:"nodes"` 32 NodeMapping map[string]string `json:"node_mapping"` 33 Status Status `json:"status"` // 34 Version string `json:"version"` // 35 ServerVersion string `json:"serverVersion"` 36 Error string `json:"error"` 37 } 38 39 // Len returns how many nodes exist in d 40 func (d *DistributedBackupDescriptor) Len() int { 41 return len(d.Nodes) 42 } 43 44 // Count number of classes 45 func (d *DistributedBackupDescriptor) Count() int { 46 count := 0 47 for _, desc := range d.Nodes { 48 count += len(desc.Classes) 49 } 50 return count 51 } 52 53 // RemoveEmpty removes any nodes with an empty class list 54 func (d *DistributedBackupDescriptor) RemoveEmpty() *DistributedBackupDescriptor { 55 for node, desc := range d.Nodes { 56 if len(desc.Classes) == 0 { 57 delete(d.Nodes, node) 58 } 59 } 60 return d 61 } 62 63 // Classes returns all classes contained in d 64 func (d *DistributedBackupDescriptor) Classes() []string { 65 set := make(map[string]struct{}, 32) 66 for _, desc := range d.Nodes { 67 for _, cls := range desc.Classes { 68 set[cls] = struct{}{} 69 } 70 } 71 lst := make([]string, len(set)) 72 i := 0 73 for cls := range set { 74 lst[i] = cls 75 i++ 76 } 77 return lst 78 } 79 80 // Filter classes based on predicate 81 func (d *DistributedBackupDescriptor) Filter(pred func(s string) bool) { 82 for _, desc := range d.Nodes { 83 cs := make([]string, 0, len(desc.Classes)) 84 for _, cls := range desc.Classes { 85 if pred(cls) { 86 cs = append(cs, cls) 87 } 88 } 89 if len(cs) != len(desc.Classes) { 90 desc.Classes = cs 91 } 92 } 93 } 94 95 // Include only these classes and remove everything else 96 func (d *DistributedBackupDescriptor) Include(classes []string) { 97 if len(classes) == 0 { 98 return 99 } 100 set := make(map[string]struct{}, len(classes)) 101 for _, cls := range classes { 102 set[cls] = struct{}{} 103 } 104 pred := func(s string) bool { 105 _, ok := set[s] 106 return ok 107 } 108 d.Filter(pred) 109 } 110 111 // Exclude removes classes from d 112 func (d *DistributedBackupDescriptor) Exclude(classes []string) { 113 if len(classes) == 0 { 114 return 115 } 116 set := make(map[string]struct{}, len(classes)) 117 for _, cls := range classes { 118 set[cls] = struct{}{} 119 } 120 pred := func(s string) bool { 121 _, ok := set[s] 122 return !ok 123 } 124 d.Filter(pred) 125 } 126 127 // ToMappedNodeName will return nodeName after applying d.NodeMapping translation on it. 128 // If nodeName is not contained in d.nodeMapping, returns nodeName unmodified 129 func (d *DistributedBackupDescriptor) ToMappedNodeName(nodeName string) string { 130 if newNodeName, ok := d.NodeMapping[nodeName]; ok { 131 return newNodeName 132 } 133 return nodeName 134 } 135 136 // ToOriginalNodeName will return nodeName after trying to find an original node name from d.NodeMapping values. 137 // If nodeName is not contained in d.nodeMapping values, returns nodeName unmodified 138 func (d *DistributedBackupDescriptor) ToOriginalNodeName(nodeName string) string { 139 for oldNodeName, newNodeName := range d.NodeMapping { 140 if newNodeName == nodeName { 141 return oldNodeName 142 } 143 } 144 return nodeName 145 } 146 147 // ApplyNodeMapping applies d.NodeMapping translation to d.Nodes. If a node in d.Nodes is not translated by d.NodeMapping, it will remain 148 // unchanged. 149 func (d *DistributedBackupDescriptor) ApplyNodeMapping() { 150 if len(d.NodeMapping) == 0 { 151 return 152 } 153 154 for k, v := range d.NodeMapping { 155 if nodeDescriptor, ok := d.Nodes[k]; !ok { 156 d.Nodes[v] = nodeDescriptor 157 delete(d.Nodes, k) 158 } 159 } 160 } 161 162 // AllExist checks if all classes exist in d. 163 // It returns either "" or the first class which it could not find 164 func (d *DistributedBackupDescriptor) AllExist(classes []string) string { 165 if len(classes) == 0 { 166 return "" 167 } 168 set := make(map[string]struct{}, len(classes)) 169 for _, cls := range classes { 170 set[cls] = struct{}{} 171 } 172 for _, dest := range d.Nodes { 173 for _, cls := range dest.Classes { 174 delete(set, cls) 175 if len(set) == 0 { 176 return "" 177 } 178 } 179 } 180 first := "" 181 for k := range set { 182 first = k 183 break 184 } 185 return first 186 } 187 188 func (d *DistributedBackupDescriptor) Validate() error { 189 if d.StartedAt.IsZero() || d.ID == "" || 190 d.Version == "" || d.ServerVersion == "" || d.Error != "" { 191 return fmt.Errorf("attribute mismatch: [id versions time error]") 192 } 193 if len(d.Nodes) == 0 { 194 return fmt.Errorf("empty list of node descriptors") 195 } 196 return nil 197 } 198 199 // resetStatus sets status and sub-statuses to Started 200 // It also empties error and sub-errors 201 func (d *DistributedBackupDescriptor) ResetStatus() *DistributedBackupDescriptor { 202 d.Status = Started 203 d.Error = "" 204 d.StartedAt = time.Now() 205 d.CompletedAt = time.Time{} 206 for _, node := range d.Nodes { 207 node.Status = Started 208 node.Error = "" 209 } 210 return d 211 } 212 213 // ShardDescriptor contains everything needed to completely restore a partition of a specific class 214 type ShardDescriptor struct { 215 Name string `json:"name"` 216 Node string `json:"node"` 217 Files []string `json:"files,omitempty"` 218 219 DocIDCounterPath string `json:"docIdCounterPath,omitempty"` 220 DocIDCounter []byte `json:"docIdCounter,omitempty"` 221 PropLengthTrackerPath string `json:"propLengthTrackerPath,omitempty"` 222 PropLengthTracker []byte `json:"propLengthTracker,omitempty"` 223 ShardVersionPath string `json:"shardVersionPath,omitempty"` 224 Version []byte `json:"version,omitempty"` 225 Chunk int32 `json:"chunk"` 226 } 227 228 // ClearTemporary clears fields that are no longer needed once compression is done. 229 // These fields are not required in versions > 1 because they are stored in the tarball. 230 func (s *ShardDescriptor) ClearTemporary() { 231 s.ShardVersionPath = "" 232 s.Version = nil 233 234 s.DocIDCounterPath = "" 235 s.DocIDCounter = nil 236 237 s.PropLengthTrackerPath = "" 238 s.PropLengthTracker = nil 239 } 240 241 // ClassDescriptor contains everything needed to completely restore a class 242 type ClassDescriptor struct { 243 Name string `json:"name"` // DB class name, also selected by user 244 Shards []*ShardDescriptor `json:"shards"` 245 ShardingState []byte `json:"shardingState"` 246 Schema []byte `json:"schema"` 247 Chunks map[int32][]string `json:"chunks,omitempty"` 248 Error error `json:"-"` 249 } 250 251 // BackupDescriptor contains everything needed to completely restore a list of classes 252 type BackupDescriptor struct { 253 StartedAt time.Time `json:"startedAt"` 254 CompletedAt time.Time `json:"completedAt"` 255 ID string `json:"id"` // User created backup id 256 Classes []ClassDescriptor `json:"classes"` 257 Status string `json:"status"` // "STARTED|TRANSFERRING|TRANSFERRED|SUCCESS|FAILED" 258 Version string `json:"version"` // 259 ServerVersion string `json:"serverVersion"` 260 Error string `json:"error"` 261 } 262 263 // List all existing classes in d 264 func (d *BackupDescriptor) List() []string { 265 lst := make([]string, len(d.Classes)) 266 for i, cls := range d.Classes { 267 lst[i] = cls.Name 268 } 269 return lst 270 } 271 272 // AllExist checks if all classes exist in d. 273 // It returns either "" or the first class which it could not find 274 func (d *BackupDescriptor) AllExist(classes []string) string { 275 if len(classes) == 0 { 276 return "" 277 } 278 set := make(map[string]struct{}, len(classes)) 279 for _, cls := range classes { 280 set[cls] = struct{}{} 281 } 282 for _, dest := range d.Classes { 283 delete(set, dest.Name) 284 } 285 first := "" 286 for k := range set { 287 first = k 288 break 289 } 290 return first 291 } 292 293 // Include only these classes and remove everything else 294 func (d *BackupDescriptor) Include(classes []string) { 295 if len(classes) == 0 { 296 return 297 } 298 set := make(map[string]struct{}, len(classes)) 299 for _, cls := range classes { 300 set[cls] = struct{}{} 301 } 302 pred := func(s string) bool { 303 _, ok := set[s] 304 return ok 305 } 306 d.Filter(pred) 307 } 308 309 // Exclude removes classes from d 310 func (d *BackupDescriptor) Exclude(classes []string) { 311 if len(classes) == 0 { 312 return 313 } 314 set := make(map[string]struct{}, len(classes)) 315 for _, cls := range classes { 316 set[cls] = struct{}{} 317 } 318 pred := func(s string) bool { 319 _, ok := set[s] 320 return !ok 321 } 322 d.Filter(pred) 323 } 324 325 // Filter classes based on predicate 326 func (d *BackupDescriptor) Filter(pred func(s string) bool) { 327 cs := make([]ClassDescriptor, 0, len(d.Classes)) 328 for _, dest := range d.Classes { 329 if pred(dest.Name) { 330 cs = append(cs, dest) 331 } 332 } 333 d.Classes = cs 334 } 335 336 // ValidateV1 validates d 337 func (d *BackupDescriptor) validateV1() error { 338 for _, c := range d.Classes { 339 if c.Name == "" || len(c.Schema) == 0 || len(c.ShardingState) == 0 { 340 return fmt.Errorf("invalid class %q: [name schema sharding]", c.Name) 341 } 342 for _, s := range c.Shards { 343 n := len(s.Files) 344 if s.Name == "" || s.Node == "" || s.DocIDCounterPath == "" || 345 s.ShardVersionPath == "" || s.PropLengthTrackerPath == "" || 346 (n > 0 && (len(s.DocIDCounter) == 0 || 347 len(s.PropLengthTracker) == 0 || 348 len(s.Version) == 0)) { 349 return fmt.Errorf("invalid shard %q.%q", c.Name, s.Name) 350 } 351 for i, fpath := range s.Files { 352 if fpath == "" { 353 return fmt.Errorf("invalid shard %q.%q: file number %d", c.Name, s.Name, i) 354 } 355 } 356 } 357 } 358 return nil 359 } 360 361 func (d *BackupDescriptor) Validate(newSchema bool) error { 362 if d.StartedAt.IsZero() || d.ID == "" || 363 d.Version == "" || d.ServerVersion == "" || d.Error != "" { 364 return fmt.Errorf("attribute mismatch: [id versions time error]") 365 } 366 if !newSchema { 367 return d.validateV1() 368 } 369 for _, c := range d.Classes { 370 if c.Name == "" || len(c.Schema) == 0 || len(c.ShardingState) == 0 { 371 return fmt.Errorf("class=%q: invalid attributes [name schema sharding]", c.Name) 372 } 373 for _, s := range c.Shards { 374 if s.Name == "" || s.Node == "" { 375 return fmt.Errorf("class=%q: invalid shard %q node=%q", c.Name, s.Name, s.Node) 376 } 377 } 378 } 379 return nil 380 } 381 382 // ToDistributed is used just for backward compatibility with the old version. 383 func (d *BackupDescriptor) ToDistributed() *DistributedBackupDescriptor { 384 node, cs := "", d.List() 385 for _, xs := range d.Classes { 386 for _, s := range xs.Shards { 387 node = s.Node 388 } 389 } 390 result := &DistributedBackupDescriptor{ 391 StartedAt: d.StartedAt, 392 CompletedAt: d.CompletedAt, 393 ID: d.ID, 394 Status: Status(d.Status), 395 Version: d.Version, 396 ServerVersion: d.ServerVersion, 397 Error: d.Error, 398 } 399 if node != "" && len(cs) > 0 { 400 result.Nodes = map[string]*NodeDescriptor{node: {Classes: cs}} 401 } 402 return result 403 }