github.com/weaviate/weaviate@v1.24.6/usecases/sharding/state.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package sharding
    13  
    14  import (
    15  	"fmt"
    16  	"math"
    17  	"math/rand"
    18  	"sort"
    19  
    20  	"github.com/spaolacci/murmur3"
    21  	"github.com/weaviate/weaviate/entities/schema"
    22  	"github.com/weaviate/weaviate/usecases/cluster"
    23  )
    24  
    25  const shardNameLength = 12
    26  
    27  type State struct {
    28  	IndexID             string              `json:"indexID"` // for monitoring, reporting purposes. Does not influence the shard-calculations
    29  	Config              Config              `json:"config"`
    30  	Physical            map[string]Physical `json:"physical"`
    31  	Virtual             []Virtual           `json:"virtual"`
    32  	PartitioningEnabled bool                `json:"partitioningEnabled"`
    33  
    34  	// different for each node, not to be serialized
    35  	localNodeName string // TODO: localNodeName is static it is better to store just once
    36  }
    37  
    38  // MigrateFromOldFormat checks if the old (pre-v1.17) format was used and
    39  // migrates it into the new format for backward-compatibility with all classes
    40  // created before v1.17
    41  func (s *State) MigrateFromOldFormat() {
    42  	for shardName, shard := range s.Physical {
    43  		if shard.LegacyBelongsToNodeForBackwardCompat != "" && len(shard.BelongsToNodes) == 0 {
    44  			shard.BelongsToNodes = []string{
    45  				shard.LegacyBelongsToNodeForBackwardCompat,
    46  			}
    47  			shard.LegacyBelongsToNodeForBackwardCompat = ""
    48  		}
    49  		s.Physical[shardName] = shard
    50  	}
    51  }
    52  
    53  type Virtual struct {
    54  	Name               string  `json:"name"`
    55  	Upper              uint64  `json:"upper"`
    56  	OwnsPercentage     float64 `json:"ownsPercentage"`
    57  	AssignedToPhysical string  `json:"assignedToPhysical"`
    58  }
    59  
    60  type Physical struct {
    61  	Name           string   `json:"name"`
    62  	OwnsVirtual    []string `json:"ownsVirtual,omitempty"`
    63  	OwnsPercentage float64  `json:"ownsPercentage"`
    64  
    65  	LegacyBelongsToNodeForBackwardCompat string   `json:"belongsToNode,omitempty"`
    66  	BelongsToNodes                       []string `json:"belongsToNodes,omitempty"`
    67  
    68  	Status string `json:"status,omitempty"`
    69  }
    70  
    71  // BelongsToNode for backward-compatibility when there was no replication. It
    72  // always returns the first node of the list
    73  func (p Physical) BelongsToNode() string {
    74  	return p.BelongsToNodes[0]
    75  }
    76  
    77  // AdjustReplicas shrinks or extends the replica set (p.BelongsToNodes)
    78  func (p *Physical) AdjustReplicas(count int, nodes nodes) error {
    79  	if count < 0 {
    80  		return fmt.Errorf("negative replication factor: %d", count)
    81  	}
    82  	// let's be defensive here and make sure available replicas are unique.
    83  	available := make(map[string]bool)
    84  	for _, n := range p.BelongsToNodes {
    85  		available[n] = true
    86  	}
    87  	// a == b should be always true except in case of bug
    88  	if b, a := len(p.BelongsToNodes), len(available); b > a {
    89  		p.BelongsToNodes = p.BelongsToNodes[:a]
    90  		i := 0
    91  		for n := range available {
    92  			p.BelongsToNodes[i] = n
    93  			i++
    94  		}
    95  	}
    96  	if count < len(p.BelongsToNodes) { // less replicas wanted
    97  		p.BelongsToNodes = p.BelongsToNodes[:count]
    98  		return nil
    99  	}
   100  
   101  	names := nodes.Candidates()
   102  	if count > len(names) {
   103  		return fmt.Errorf("not enough replicas: found %d want %d", len(names), count)
   104  	}
   105  
   106  	// make sure included nodes are unique
   107  	for _, n := range names {
   108  		if !available[n] {
   109  			p.BelongsToNodes = append(p.BelongsToNodes, n)
   110  			available[n] = true
   111  		}
   112  		if len(available) == count {
   113  			break
   114  		}
   115  	}
   116  
   117  	return nil
   118  }
   119  
   120  func (p *Physical) ActivityStatus() string {
   121  	return schema.ActivityStatus(p.Status)
   122  }
   123  
   124  type nodes interface {
   125  	Candidates() []string
   126  	LocalName() string
   127  }
   128  
   129  func InitState(id string, config Config, nodes nodes, replFactor int64, partitioningEnabled bool) (*State, error) {
   130  	out := &State{
   131  		Config:              config,
   132  		IndexID:             id,
   133  		localNodeName:       nodes.LocalName(),
   134  		PartitioningEnabled: partitioningEnabled,
   135  	}
   136  	if partitioningEnabled {
   137  		out.Physical = make(map[string]Physical, 128)
   138  		return out, nil
   139  	}
   140  
   141  	names := nodes.Candidates()
   142  	if f, n := replFactor, len(names); f > int64(n) {
   143  		return nil, fmt.Errorf("not enough replicas: found %d want %d", n, f)
   144  	}
   145  
   146  	if err := out.initPhysical(names, replFactor); err != nil {
   147  		return nil, err
   148  	}
   149  	out.initVirtual()
   150  	out.distributeVirtualAmongPhysical()
   151  
   152  	return out, nil
   153  }
   154  
   155  // Shard returns the shard name if it exits and empty string otherwise
   156  func (s *State) Shard(partitionKey, objectID string) string {
   157  	if s.PartitioningEnabled {
   158  		if _, ok := s.Physical[partitionKey]; ok {
   159  			return partitionKey // will change in the future
   160  		}
   161  		return ""
   162  	}
   163  	return s.PhysicalShard([]byte(objectID))
   164  }
   165  
   166  func (s *State) PhysicalShard(in []byte) string {
   167  	if len(s.Physical) == 0 {
   168  		panic("no physical shards present")
   169  	}
   170  
   171  	if len(s.Virtual) == 0 {
   172  		panic("no virtual shards present")
   173  	}
   174  
   175  	h := murmur3.New64()
   176  	h.Write(in)
   177  	token := h.Sum64()
   178  
   179  	virtual := s.virtualByToken(token)
   180  
   181  	return virtual.AssignedToPhysical
   182  }
   183  
   184  // CountPhysicalShards return a count of physical shards
   185  func (s *State) CountPhysicalShards() int {
   186  	return len(s.Physical)
   187  }
   188  
   189  func (s *State) AllPhysicalShards() []string {
   190  	var names []string
   191  	for _, physical := range s.Physical {
   192  		names = append(names, physical.Name)
   193  	}
   194  
   195  	sort.Slice(names, func(a, b int) bool {
   196  		return names[a] < names[b]
   197  	})
   198  
   199  	return names
   200  }
   201  
   202  func (s *State) AllLocalPhysicalShards() []string {
   203  	var names []string
   204  	for _, physical := range s.Physical {
   205  		if s.IsLocalShard(physical.Name) {
   206  			names = append(names, physical.Name)
   207  		}
   208  	}
   209  
   210  	sort.Slice(names, func(a, b int) bool {
   211  		return names[a] < names[b]
   212  	})
   213  
   214  	return names
   215  }
   216  
   217  func (s *State) SetLocalName(name string) {
   218  	s.localNodeName = name
   219  }
   220  
   221  func (s *State) IsLocalShard(name string) bool {
   222  	for _, node := range s.Physical[name].BelongsToNodes {
   223  		if node == s.localNodeName {
   224  			return true
   225  		}
   226  	}
   227  
   228  	return false
   229  }
   230  
   231  // initPhysical assigns shards to nodes according to the following rules:
   232  //
   233  //   - The starting point of the ring is random
   234  //   - Shard N+1's first node is the right neighbor of shard N's first node
   235  //   - If a shard has multiple nodes (replication) they are always the right
   236  //     neighbors of the first node of that shard
   237  //
   238  // Example with 3 nodes, 2 shards, replicationFactor=2:
   239  //
   240  // Shard 1: Node1, Node2
   241  // Shard 2: Node2, Node3
   242  //
   243  // Example with 3 nodes, 3 shards, replicationFactor=3:
   244  //
   245  // Shard 1: Node1, Node2, Node3
   246  // Shard 2: Node2, Node3, Node1
   247  // Shard 3: Node3, Node1, Node2
   248  //
   249  // Example with 12 nodes, 3 shards, replicationFactor=5:
   250  //
   251  // Shard 1: Node7, Node8, Node9, Node10, Node 11
   252  // Shard 2: Node8, Node9, Node10, Node 11, Node 12
   253  // Shard 3: Node9, Node10, Node11, Node 12, Node 1
   254  func (s *State) initPhysical(nodes []string, replFactor int64) error {
   255  	it, err := cluster.NewNodeIterator(nodes, cluster.StartAfter)
   256  	if err != nil {
   257  		return err
   258  	}
   259  	it.SetStartNode(nodes[len(nodes)-1])
   260  
   261  	s.Physical = map[string]Physical{}
   262  
   263  	nodeSet := make(map[string]bool)
   264  	for i := 0; i < s.Config.DesiredCount; i++ {
   265  		name := generateShardName()
   266  		shard := Physical{Name: name}
   267  		shard.BelongsToNodes = make([]string, 0, replFactor)
   268  		for { // select shard
   269  			node := it.Next()
   270  			if len(nodeSet) == len(nodes) { // this is a new round
   271  				for k := range nodeSet {
   272  					delete(nodeSet, k)
   273  				}
   274  			}
   275  			if !nodeSet[node] {
   276  				nodeSet[node] = true
   277  				shard.BelongsToNodes = append(shard.BelongsToNodes, node)
   278  				break
   279  			}
   280  		}
   281  
   282  		for i := replFactor; i > 1; i-- {
   283  			shard.BelongsToNodes = append(shard.BelongsToNodes, it.Next())
   284  		}
   285  
   286  		s.Physical[name] = shard
   287  	}
   288  
   289  	return nil
   290  }
   291  
   292  // GetPartitions based on the specified shards, available nodes, and replFactor
   293  // It doesn't change the internal state
   294  func (s *State) GetPartitions(lookUp nodes, shards []string, replFactor int64) (map[string][]string, error) {
   295  	nodes := lookUp.Candidates()
   296  	if len(nodes) == 0 {
   297  		return nil, fmt.Errorf("list of node candidates is empty")
   298  	}
   299  	if f, n := replFactor, len(nodes); f > int64(n) {
   300  		return nil, fmt.Errorf("not enough replicas: found %d want %d", n, f)
   301  	}
   302  	it, err := cluster.NewNodeIterator(nodes, cluster.StartAfter)
   303  	if err != nil {
   304  		return nil, err
   305  	}
   306  	it.SetStartNode(nodes[len(nodes)-1])
   307  	partitions := make(map[string][]string, len(shards))
   308  	nodeSet := make(map[string]bool)
   309  	for _, name := range shards {
   310  		if _, alreadyExists := s.Physical[name]; alreadyExists {
   311  			continue
   312  		}
   313  		owners := make([]string, 0, replFactor)
   314  		for { // select shard
   315  			node := it.Next()
   316  			if len(nodeSet) == len(nodes) { // this is a new round
   317  				for k := range nodeSet {
   318  					delete(nodeSet, k)
   319  				}
   320  			}
   321  			if !nodeSet[node] {
   322  				nodeSet[node] = true
   323  				owners = append(owners, node)
   324  				break
   325  			}
   326  		}
   327  
   328  		for i := replFactor; i > 1; i-- {
   329  			owners = append(owners, it.Next())
   330  		}
   331  
   332  		partitions[name] = owners
   333  	}
   334  
   335  	return partitions, nil
   336  }
   337  
   338  // AddPartition to physical shards
   339  func (s *State) AddPartition(name string, nodes []string, status string) Physical {
   340  	p := Physical{
   341  		Name:           name,
   342  		BelongsToNodes: nodes,
   343  		OwnsPercentage: 1.0,
   344  		Status:         status,
   345  	}
   346  	s.Physical[name] = p
   347  	return p
   348  }
   349  
   350  // DeletePartition to physical shards
   351  func (s *State) DeletePartition(name string) {
   352  	delete(s.Physical, name)
   353  }
   354  
   355  // ApplyNodeMapping replaces node names with their new value form nodeMapping in s.
   356  // If s.LegacyBelongsToNodeForBackwardCompat is non empty, it will also perform node name replacement if present in nodeMapping.
   357  func (s *State) ApplyNodeMapping(nodeMapping map[string]string) {
   358  	if len(nodeMapping) == 0 {
   359  		return
   360  	}
   361  
   362  	for k, v := range s.Physical {
   363  		if v.LegacyBelongsToNodeForBackwardCompat != "" {
   364  			if newNodeName, ok := nodeMapping[v.LegacyBelongsToNodeForBackwardCompat]; ok {
   365  				v.LegacyBelongsToNodeForBackwardCompat = newNodeName
   366  			}
   367  		}
   368  
   369  		for i, nodeName := range v.BelongsToNodes {
   370  			if newNodeName, ok := nodeMapping[nodeName]; ok {
   371  				v.BelongsToNodes[i] = newNodeName
   372  			}
   373  		}
   374  
   375  		s.Physical[k] = v
   376  	}
   377  }
   378  
   379  func (s *State) initVirtual() {
   380  	count := s.Config.DesiredVirtualCount
   381  	s.Virtual = make([]Virtual, count)
   382  
   383  	for i := range s.Virtual {
   384  		name := generateShardName()
   385  		h := murmur3.New64()
   386  		h.Write([]byte(name))
   387  		s.Virtual[i] = Virtual{Name: name, Upper: h.Sum64()}
   388  	}
   389  
   390  	sort.Slice(s.Virtual, func(a, b int) bool {
   391  		return s.Virtual[a].Upper < s.Virtual[b].Upper
   392  	})
   393  
   394  	for i := range s.Virtual {
   395  		var tokenCount uint64
   396  		if i == 0 {
   397  			tokenCount = s.Virtual[0].Upper + (math.MaxUint64 - s.Virtual[len(s.Virtual)-1].Upper)
   398  		} else {
   399  			tokenCount = s.Virtual[i].Upper - s.Virtual[i-1].Upper
   400  		}
   401  		s.Virtual[i].OwnsPercentage = float64(tokenCount) / float64(math.MaxUint64)
   402  
   403  	}
   404  }
   405  
   406  // this is a primitive distribution that only works for initializing. Once we
   407  // want to support dynamic sharding, we need to come up with something better
   408  // than this
   409  func (s *State) distributeVirtualAmongPhysical() {
   410  	ids := make([]string, len(s.Virtual))
   411  	for i, v := range s.Virtual {
   412  		ids[i] = v.Name
   413  	}
   414  
   415  	rand.Shuffle(len(s.Virtual), func(a, b int) {
   416  		ids[a], ids[b] = ids[b], ids[a]
   417  	})
   418  
   419  	physicalIDs := make([]string, 0, len(s.Physical))
   420  	for name := range s.Physical {
   421  		physicalIDs = append(physicalIDs, name)
   422  	}
   423  
   424  	for i, vid := range ids {
   425  		pickedPhysical := physicalIDs[i%len(physicalIDs)]
   426  
   427  		virtual := s.virtualByName(vid)
   428  		virtual.AssignedToPhysical = pickedPhysical
   429  		physical := s.Physical[pickedPhysical]
   430  		physical.OwnsVirtual = append(physical.OwnsVirtual, vid)
   431  		physical.OwnsPercentage += virtual.OwnsPercentage
   432  		s.Physical[pickedPhysical] = physical
   433  	}
   434  }
   435  
   436  // uses linear search, but should only be used during shard init and update
   437  // operations, not in regular
   438  func (s *State) virtualByName(name string) *Virtual {
   439  	for i := range s.Virtual {
   440  		if s.Virtual[i].Name == name {
   441  			return &s.Virtual[i]
   442  		}
   443  	}
   444  
   445  	return nil
   446  }
   447  
   448  func (s *State) virtualByToken(token uint64) *Virtual {
   449  	for i := range s.Virtual {
   450  		if token > s.Virtual[i].Upper {
   451  			continue
   452  		}
   453  
   454  		return &s.Virtual[i]
   455  	}
   456  
   457  	return &s.Virtual[0]
   458  }
   459  
   460  const shardNameChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
   461  
   462  func generateShardName() string {
   463  	b := make([]byte, shardNameLength)
   464  	for i := range b {
   465  		b[i] = shardNameChars[rand.Intn(len(shardNameChars))]
   466  	}
   467  
   468  	return string(b)
   469  }
   470  
   471  func (s State) DeepCopy() State {
   472  	var virtualCopy []Virtual
   473  
   474  	physicalCopy := make(map[string]Physical, len(s.Physical))
   475  	for name, shard := range s.Physical {
   476  		physicalCopy[name] = shard.DeepCopy()
   477  	}
   478  
   479  	if len(s.Virtual) > 0 {
   480  		virtualCopy = make([]Virtual, len(s.Virtual))
   481  	}
   482  	for i, virtual := range s.Virtual {
   483  		virtualCopy[i] = virtual.DeepCopy()
   484  	}
   485  
   486  	return State{
   487  		localNodeName:       s.localNodeName,
   488  		IndexID:             s.IndexID,
   489  		Config:              s.Config.DeepCopy(),
   490  		Physical:            physicalCopy,
   491  		Virtual:             virtualCopy,
   492  		PartitioningEnabled: s.PartitioningEnabled,
   493  	}
   494  }
   495  
   496  func (c Config) DeepCopy() Config {
   497  	return Config{
   498  		VirtualPerPhysical:  c.VirtualPerPhysical,
   499  		DesiredCount:        c.DesiredCount,
   500  		ActualCount:         c.ActualCount,
   501  		DesiredVirtualCount: c.DesiredVirtualCount,
   502  		ActualVirtualCount:  c.ActualVirtualCount,
   503  		Key:                 c.Key,
   504  		Strategy:            c.Strategy,
   505  		Function:            c.Function,
   506  	}
   507  }
   508  
   509  func (p Physical) DeepCopy() Physical {
   510  	var ownsVirtualCopy []string
   511  	if len(p.OwnsVirtual) > 0 {
   512  		ownsVirtualCopy = make([]string, len(p.OwnsVirtual))
   513  		copy(ownsVirtualCopy, p.OwnsVirtual)
   514  	}
   515  
   516  	belongsCopy := make([]string, len(p.BelongsToNodes))
   517  	copy(belongsCopy, p.BelongsToNodes)
   518  
   519  	return Physical{
   520  		Name:           p.Name,
   521  		OwnsVirtual:    ownsVirtualCopy,
   522  		OwnsPercentage: p.OwnsPercentage,
   523  		BelongsToNodes: belongsCopy,
   524  		Status:         p.Status,
   525  	}
   526  }
   527  
   528  func (v Virtual) DeepCopy() Virtual {
   529  	return Virtual{
   530  		Name:               v.Name,
   531  		Upper:              v.Upper,
   532  		OwnsPercentage:     v.OwnsPercentage,
   533  		AssignedToPhysical: v.AssignedToPhysical,
   534  	}
   535  }