github.com/weaviate/weaviate@v1.24.6/entities/backup/descriptor.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package backup
    13  
    14  import (
    15  	"fmt"
    16  	"time"
    17  )
    18  
    19  // NodeDescriptor contains data related to one participant in DBRO
    20  type NodeDescriptor struct {
    21  	Classes []string `json:"classes"`
    22  	Status  Status   `json:"status"`
    23  	Error   string   `json:"error"`
    24  }
    25  
    26  // DistributedBAckupDescriptor contains everything need to completely restore a distributed backup
    27  type DistributedBackupDescriptor struct {
    28  	StartedAt     time.Time                  `json:"startedAt"`
    29  	CompletedAt   time.Time                  `json:"completedAt"`
    30  	ID            string                     `json:"id"` // User created backup id
    31  	Nodes         map[string]*NodeDescriptor `json:"nodes"`
    32  	NodeMapping   map[string]string          `json:"node_mapping"`
    33  	Status        Status                     `json:"status"`  //
    34  	Version       string                     `json:"version"` //
    35  	ServerVersion string                     `json:"serverVersion"`
    36  	Error         string                     `json:"error"`
    37  }
    38  
    39  // Len returns how many nodes exist in d
    40  func (d *DistributedBackupDescriptor) Len() int {
    41  	return len(d.Nodes)
    42  }
    43  
    44  // Count number of classes
    45  func (d *DistributedBackupDescriptor) Count() int {
    46  	count := 0
    47  	for _, desc := range d.Nodes {
    48  		count += len(desc.Classes)
    49  	}
    50  	return count
    51  }
    52  
    53  // RemoveEmpty removes any nodes with an empty class list
    54  func (d *DistributedBackupDescriptor) RemoveEmpty() *DistributedBackupDescriptor {
    55  	for node, desc := range d.Nodes {
    56  		if len(desc.Classes) == 0 {
    57  			delete(d.Nodes, node)
    58  		}
    59  	}
    60  	return d
    61  }
    62  
    63  // Classes returns all classes contained in d
    64  func (d *DistributedBackupDescriptor) Classes() []string {
    65  	set := make(map[string]struct{}, 32)
    66  	for _, desc := range d.Nodes {
    67  		for _, cls := range desc.Classes {
    68  			set[cls] = struct{}{}
    69  		}
    70  	}
    71  	lst := make([]string, len(set))
    72  	i := 0
    73  	for cls := range set {
    74  		lst[i] = cls
    75  		i++
    76  	}
    77  	return lst
    78  }
    79  
    80  // Filter classes based on predicate
    81  func (d *DistributedBackupDescriptor) Filter(pred func(s string) bool) {
    82  	for _, desc := range d.Nodes {
    83  		cs := make([]string, 0, len(desc.Classes))
    84  		for _, cls := range desc.Classes {
    85  			if pred(cls) {
    86  				cs = append(cs, cls)
    87  			}
    88  		}
    89  		if len(cs) != len(desc.Classes) {
    90  			desc.Classes = cs
    91  		}
    92  	}
    93  }
    94  
    95  // Include only these classes and remove everything else
    96  func (d *DistributedBackupDescriptor) Include(classes []string) {
    97  	if len(classes) == 0 {
    98  		return
    99  	}
   100  	set := make(map[string]struct{}, len(classes))
   101  	for _, cls := range classes {
   102  		set[cls] = struct{}{}
   103  	}
   104  	pred := func(s string) bool {
   105  		_, ok := set[s]
   106  		return ok
   107  	}
   108  	d.Filter(pred)
   109  }
   110  
   111  // Exclude removes classes from d
   112  func (d *DistributedBackupDescriptor) Exclude(classes []string) {
   113  	if len(classes) == 0 {
   114  		return
   115  	}
   116  	set := make(map[string]struct{}, len(classes))
   117  	for _, cls := range classes {
   118  		set[cls] = struct{}{}
   119  	}
   120  	pred := func(s string) bool {
   121  		_, ok := set[s]
   122  		return !ok
   123  	}
   124  	d.Filter(pred)
   125  }
   126  
   127  // ToMappedNodeName will return nodeName after applying d.NodeMapping translation on it.
   128  // If nodeName is not contained in d.nodeMapping, returns nodeName unmodified
   129  func (d *DistributedBackupDescriptor) ToMappedNodeName(nodeName string) string {
   130  	if newNodeName, ok := d.NodeMapping[nodeName]; ok {
   131  		return newNodeName
   132  	}
   133  	return nodeName
   134  }
   135  
   136  // ToOriginalNodeName will return nodeName after trying to find an original node name from d.NodeMapping values.
   137  // If nodeName is not contained in d.nodeMapping values, returns nodeName unmodified
   138  func (d *DistributedBackupDescriptor) ToOriginalNodeName(nodeName string) string {
   139  	for oldNodeName, newNodeName := range d.NodeMapping {
   140  		if newNodeName == nodeName {
   141  			return oldNodeName
   142  		}
   143  	}
   144  	return nodeName
   145  }
   146  
   147  // ApplyNodeMapping applies d.NodeMapping translation to d.Nodes. If a node in d.Nodes is not translated by d.NodeMapping, it will remain
   148  // unchanged.
   149  func (d *DistributedBackupDescriptor) ApplyNodeMapping() {
   150  	if len(d.NodeMapping) == 0 {
   151  		return
   152  	}
   153  
   154  	for k, v := range d.NodeMapping {
   155  		if nodeDescriptor, ok := d.Nodes[k]; !ok {
   156  			d.Nodes[v] = nodeDescriptor
   157  			delete(d.Nodes, k)
   158  		}
   159  	}
   160  }
   161  
   162  // AllExist checks if all classes exist in d.
   163  // It returns either "" or the first class which it could not find
   164  func (d *DistributedBackupDescriptor) AllExist(classes []string) string {
   165  	if len(classes) == 0 {
   166  		return ""
   167  	}
   168  	set := make(map[string]struct{}, len(classes))
   169  	for _, cls := range classes {
   170  		set[cls] = struct{}{}
   171  	}
   172  	for _, dest := range d.Nodes {
   173  		for _, cls := range dest.Classes {
   174  			delete(set, cls)
   175  			if len(set) == 0 {
   176  				return ""
   177  			}
   178  		}
   179  	}
   180  	first := ""
   181  	for k := range set {
   182  		first = k
   183  		break
   184  	}
   185  	return first
   186  }
   187  
   188  func (d *DistributedBackupDescriptor) Validate() error {
   189  	if d.StartedAt.IsZero() || d.ID == "" ||
   190  		d.Version == "" || d.ServerVersion == "" || d.Error != "" {
   191  		return fmt.Errorf("attribute mismatch: [id versions time error]")
   192  	}
   193  	if len(d.Nodes) == 0 {
   194  		return fmt.Errorf("empty list of node descriptors")
   195  	}
   196  	return nil
   197  }
   198  
   199  // resetStatus sets status and sub-statuses to Started
   200  // It also empties error and sub-errors
   201  func (d *DistributedBackupDescriptor) ResetStatus() *DistributedBackupDescriptor {
   202  	d.Status = Started
   203  	d.Error = ""
   204  	d.StartedAt = time.Now()
   205  	d.CompletedAt = time.Time{}
   206  	for _, node := range d.Nodes {
   207  		node.Status = Started
   208  		node.Error = ""
   209  	}
   210  	return d
   211  }
   212  
   213  // ShardDescriptor contains everything needed to completely restore a partition of a specific class
   214  type ShardDescriptor struct {
   215  	Name  string   `json:"name"`
   216  	Node  string   `json:"node"`
   217  	Files []string `json:"files,omitempty"`
   218  
   219  	DocIDCounterPath      string `json:"docIdCounterPath,omitempty"`
   220  	DocIDCounter          []byte `json:"docIdCounter,omitempty"`
   221  	PropLengthTrackerPath string `json:"propLengthTrackerPath,omitempty"`
   222  	PropLengthTracker     []byte `json:"propLengthTracker,omitempty"`
   223  	ShardVersionPath      string `json:"shardVersionPath,omitempty"`
   224  	Version               []byte `json:"version,omitempty"`
   225  	Chunk                 int32  `json:"chunk"`
   226  }
   227  
   228  // ClearTemporary clears fields that are no longer needed once compression is done.
   229  // These fields are not required in versions > 1 because they are stored in the tarball.
   230  func (s *ShardDescriptor) ClearTemporary() {
   231  	s.ShardVersionPath = ""
   232  	s.Version = nil
   233  
   234  	s.DocIDCounterPath = ""
   235  	s.DocIDCounter = nil
   236  
   237  	s.PropLengthTrackerPath = ""
   238  	s.PropLengthTracker = nil
   239  }
   240  
   241  // ClassDescriptor contains everything needed to completely restore a class
   242  type ClassDescriptor struct {
   243  	Name          string             `json:"name"` // DB class name, also selected by user
   244  	Shards        []*ShardDescriptor `json:"shards"`
   245  	ShardingState []byte             `json:"shardingState"`
   246  	Schema        []byte             `json:"schema"`
   247  	Chunks        map[int32][]string `json:"chunks,omitempty"`
   248  	Error         error              `json:"-"`
   249  }
   250  
   251  // BackupDescriptor contains everything needed to completely restore a list of classes
   252  type BackupDescriptor struct {
   253  	StartedAt     time.Time         `json:"startedAt"`
   254  	CompletedAt   time.Time         `json:"completedAt"`
   255  	ID            string            `json:"id"` // User created backup id
   256  	Classes       []ClassDescriptor `json:"classes"`
   257  	Status        string            `json:"status"`  // "STARTED|TRANSFERRING|TRANSFERRED|SUCCESS|FAILED"
   258  	Version       string            `json:"version"` //
   259  	ServerVersion string            `json:"serverVersion"`
   260  	Error         string            `json:"error"`
   261  }
   262  
   263  // List all existing classes in d
   264  func (d *BackupDescriptor) List() []string {
   265  	lst := make([]string, len(d.Classes))
   266  	for i, cls := range d.Classes {
   267  		lst[i] = cls.Name
   268  	}
   269  	return lst
   270  }
   271  
   272  // AllExist checks if all classes exist in d.
   273  // It returns either "" or the first class which it could not find
   274  func (d *BackupDescriptor) AllExist(classes []string) string {
   275  	if len(classes) == 0 {
   276  		return ""
   277  	}
   278  	set := make(map[string]struct{}, len(classes))
   279  	for _, cls := range classes {
   280  		set[cls] = struct{}{}
   281  	}
   282  	for _, dest := range d.Classes {
   283  		delete(set, dest.Name)
   284  	}
   285  	first := ""
   286  	for k := range set {
   287  		first = k
   288  		break
   289  	}
   290  	return first
   291  }
   292  
   293  // Include only these classes and remove everything else
   294  func (d *BackupDescriptor) Include(classes []string) {
   295  	if len(classes) == 0 {
   296  		return
   297  	}
   298  	set := make(map[string]struct{}, len(classes))
   299  	for _, cls := range classes {
   300  		set[cls] = struct{}{}
   301  	}
   302  	pred := func(s string) bool {
   303  		_, ok := set[s]
   304  		return ok
   305  	}
   306  	d.Filter(pred)
   307  }
   308  
   309  // Exclude removes classes from d
   310  func (d *BackupDescriptor) Exclude(classes []string) {
   311  	if len(classes) == 0 {
   312  		return
   313  	}
   314  	set := make(map[string]struct{}, len(classes))
   315  	for _, cls := range classes {
   316  		set[cls] = struct{}{}
   317  	}
   318  	pred := func(s string) bool {
   319  		_, ok := set[s]
   320  		return !ok
   321  	}
   322  	d.Filter(pred)
   323  }
   324  
   325  // Filter classes based on predicate
   326  func (d *BackupDescriptor) Filter(pred func(s string) bool) {
   327  	cs := make([]ClassDescriptor, 0, len(d.Classes))
   328  	for _, dest := range d.Classes {
   329  		if pred(dest.Name) {
   330  			cs = append(cs, dest)
   331  		}
   332  	}
   333  	d.Classes = cs
   334  }
   335  
   336  // ValidateV1 validates d
   337  func (d *BackupDescriptor) validateV1() error {
   338  	for _, c := range d.Classes {
   339  		if c.Name == "" || len(c.Schema) == 0 || len(c.ShardingState) == 0 {
   340  			return fmt.Errorf("invalid class %q: [name schema sharding]", c.Name)
   341  		}
   342  		for _, s := range c.Shards {
   343  			n := len(s.Files)
   344  			if s.Name == "" || s.Node == "" || s.DocIDCounterPath == "" ||
   345  				s.ShardVersionPath == "" || s.PropLengthTrackerPath == "" ||
   346  				(n > 0 && (len(s.DocIDCounter) == 0 ||
   347  					len(s.PropLengthTracker) == 0 ||
   348  					len(s.Version) == 0)) {
   349  				return fmt.Errorf("invalid shard %q.%q", c.Name, s.Name)
   350  			}
   351  			for i, fpath := range s.Files {
   352  				if fpath == "" {
   353  					return fmt.Errorf("invalid shard %q.%q: file number %d", c.Name, s.Name, i)
   354  				}
   355  			}
   356  		}
   357  	}
   358  	return nil
   359  }
   360  
   361  func (d *BackupDescriptor) Validate(newSchema bool) error {
   362  	if d.StartedAt.IsZero() || d.ID == "" ||
   363  		d.Version == "" || d.ServerVersion == "" || d.Error != "" {
   364  		return fmt.Errorf("attribute mismatch: [id versions time error]")
   365  	}
   366  	if !newSchema {
   367  		return d.validateV1()
   368  	}
   369  	for _, c := range d.Classes {
   370  		if c.Name == "" || len(c.Schema) == 0 || len(c.ShardingState) == 0 {
   371  			return fmt.Errorf("class=%q: invalid attributes [name schema sharding]", c.Name)
   372  		}
   373  		for _, s := range c.Shards {
   374  			if s.Name == "" || s.Node == "" {
   375  				return fmt.Errorf("class=%q: invalid shard %q node=%q", c.Name, s.Name, s.Node)
   376  			}
   377  		}
   378  	}
   379  	return nil
   380  }
   381  
   382  // ToDistributed is used just for backward compatibility with the old version.
   383  func (d *BackupDescriptor) ToDistributed() *DistributedBackupDescriptor {
   384  	node, cs := "", d.List()
   385  	for _, xs := range d.Classes {
   386  		for _, s := range xs.Shards {
   387  			node = s.Node
   388  		}
   389  	}
   390  	result := &DistributedBackupDescriptor{
   391  		StartedAt:     d.StartedAt,
   392  		CompletedAt:   d.CompletedAt,
   393  		ID:            d.ID,
   394  		Status:        Status(d.Status),
   395  		Version:       d.Version,
   396  		ServerVersion: d.ServerVersion,
   397  		Error:         d.Error,
   398  	}
   399  	if node != "" && len(cs) > 0 {
   400  		result.Nodes = map[string]*NodeDescriptor{node: {Classes: cs}}
   401  	}
   402  	return result
   403  }