github.com/weaviate/weaviate@v1.24.6/usecases/sharding/state.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package sharding 13 14 import ( 15 "fmt" 16 "math" 17 "math/rand" 18 "sort" 19 20 "github.com/spaolacci/murmur3" 21 "github.com/weaviate/weaviate/entities/schema" 22 "github.com/weaviate/weaviate/usecases/cluster" 23 ) 24 25 const shardNameLength = 12 26 27 type State struct { 28 IndexID string `json:"indexID"` // for monitoring, reporting purposes. Does not influence the shard-calculations 29 Config Config `json:"config"` 30 Physical map[string]Physical `json:"physical"` 31 Virtual []Virtual `json:"virtual"` 32 PartitioningEnabled bool `json:"partitioningEnabled"` 33 34 // different for each node, not to be serialized 35 localNodeName string // TODO: localNodeName is static it is better to store just once 36 } 37 38 // MigrateFromOldFormat checks if the old (pre-v1.17) format was used and 39 // migrates it into the new format for backward-compatibility with all classes 40 // created before v1.17 41 func (s *State) MigrateFromOldFormat() { 42 for shardName, shard := range s.Physical { 43 if shard.LegacyBelongsToNodeForBackwardCompat != "" && len(shard.BelongsToNodes) == 0 { 44 shard.BelongsToNodes = []string{ 45 shard.LegacyBelongsToNodeForBackwardCompat, 46 } 47 shard.LegacyBelongsToNodeForBackwardCompat = "" 48 } 49 s.Physical[shardName] = shard 50 } 51 } 52 53 type Virtual struct { 54 Name string `json:"name"` 55 Upper uint64 `json:"upper"` 56 OwnsPercentage float64 `json:"ownsPercentage"` 57 AssignedToPhysical string `json:"assignedToPhysical"` 58 } 59 60 type Physical struct { 61 Name string `json:"name"` 62 OwnsVirtual []string `json:"ownsVirtual,omitempty"` 63 OwnsPercentage float64 `json:"ownsPercentage"` 64 65 LegacyBelongsToNodeForBackwardCompat string `json:"belongsToNode,omitempty"` 66 BelongsToNodes []string `json:"belongsToNodes,omitempty"` 67 68 Status string `json:"status,omitempty"` 69 } 70 71 // BelongsToNode for backward-compatibility when there was no replication. It 72 // always returns the first node of the list 73 func (p Physical) BelongsToNode() string { 74 return p.BelongsToNodes[0] 75 } 76 77 // AdjustReplicas shrinks or extends the replica set (p.BelongsToNodes) 78 func (p *Physical) AdjustReplicas(count int, nodes nodes) error { 79 if count < 0 { 80 return fmt.Errorf("negative replication factor: %d", count) 81 } 82 // let's be defensive here and make sure available replicas are unique. 83 available := make(map[string]bool) 84 for _, n := range p.BelongsToNodes { 85 available[n] = true 86 } 87 // a == b should be always true except in case of bug 88 if b, a := len(p.BelongsToNodes), len(available); b > a { 89 p.BelongsToNodes = p.BelongsToNodes[:a] 90 i := 0 91 for n := range available { 92 p.BelongsToNodes[i] = n 93 i++ 94 } 95 } 96 if count < len(p.BelongsToNodes) { // less replicas wanted 97 p.BelongsToNodes = p.BelongsToNodes[:count] 98 return nil 99 } 100 101 names := nodes.Candidates() 102 if count > len(names) { 103 return fmt.Errorf("not enough replicas: found %d want %d", len(names), count) 104 } 105 106 // make sure included nodes are unique 107 for _, n := range names { 108 if !available[n] { 109 p.BelongsToNodes = append(p.BelongsToNodes, n) 110 available[n] = true 111 } 112 if len(available) == count { 113 break 114 } 115 } 116 117 return nil 118 } 119 120 func (p *Physical) ActivityStatus() string { 121 return schema.ActivityStatus(p.Status) 122 } 123 124 type nodes interface { 125 Candidates() []string 126 LocalName() string 127 } 128 129 func InitState(id string, config Config, nodes nodes, replFactor int64, partitioningEnabled bool) (*State, error) { 130 out := &State{ 131 Config: config, 132 IndexID: id, 133 localNodeName: nodes.LocalName(), 134 PartitioningEnabled: partitioningEnabled, 135 } 136 if partitioningEnabled { 137 out.Physical = make(map[string]Physical, 128) 138 return out, nil 139 } 140 141 names := nodes.Candidates() 142 if f, n := replFactor, len(names); f > int64(n) { 143 return nil, fmt.Errorf("not enough replicas: found %d want %d", n, f) 144 } 145 146 if err := out.initPhysical(names, replFactor); err != nil { 147 return nil, err 148 } 149 out.initVirtual() 150 out.distributeVirtualAmongPhysical() 151 152 return out, nil 153 } 154 155 // Shard returns the shard name if it exits and empty string otherwise 156 func (s *State) Shard(partitionKey, objectID string) string { 157 if s.PartitioningEnabled { 158 if _, ok := s.Physical[partitionKey]; ok { 159 return partitionKey // will change in the future 160 } 161 return "" 162 } 163 return s.PhysicalShard([]byte(objectID)) 164 } 165 166 func (s *State) PhysicalShard(in []byte) string { 167 if len(s.Physical) == 0 { 168 panic("no physical shards present") 169 } 170 171 if len(s.Virtual) == 0 { 172 panic("no virtual shards present") 173 } 174 175 h := murmur3.New64() 176 h.Write(in) 177 token := h.Sum64() 178 179 virtual := s.virtualByToken(token) 180 181 return virtual.AssignedToPhysical 182 } 183 184 // CountPhysicalShards return a count of physical shards 185 func (s *State) CountPhysicalShards() int { 186 return len(s.Physical) 187 } 188 189 func (s *State) AllPhysicalShards() []string { 190 var names []string 191 for _, physical := range s.Physical { 192 names = append(names, physical.Name) 193 } 194 195 sort.Slice(names, func(a, b int) bool { 196 return names[a] < names[b] 197 }) 198 199 return names 200 } 201 202 func (s *State) AllLocalPhysicalShards() []string { 203 var names []string 204 for _, physical := range s.Physical { 205 if s.IsLocalShard(physical.Name) { 206 names = append(names, physical.Name) 207 } 208 } 209 210 sort.Slice(names, func(a, b int) bool { 211 return names[a] < names[b] 212 }) 213 214 return names 215 } 216 217 func (s *State) SetLocalName(name string) { 218 s.localNodeName = name 219 } 220 221 func (s *State) IsLocalShard(name string) bool { 222 for _, node := range s.Physical[name].BelongsToNodes { 223 if node == s.localNodeName { 224 return true 225 } 226 } 227 228 return false 229 } 230 231 // initPhysical assigns shards to nodes according to the following rules: 232 // 233 // - The starting point of the ring is random 234 // - Shard N+1's first node is the right neighbor of shard N's first node 235 // - If a shard has multiple nodes (replication) they are always the right 236 // neighbors of the first node of that shard 237 // 238 // Example with 3 nodes, 2 shards, replicationFactor=2: 239 // 240 // Shard 1: Node1, Node2 241 // Shard 2: Node2, Node3 242 // 243 // Example with 3 nodes, 3 shards, replicationFactor=3: 244 // 245 // Shard 1: Node1, Node2, Node3 246 // Shard 2: Node2, Node3, Node1 247 // Shard 3: Node3, Node1, Node2 248 // 249 // Example with 12 nodes, 3 shards, replicationFactor=5: 250 // 251 // Shard 1: Node7, Node8, Node9, Node10, Node 11 252 // Shard 2: Node8, Node9, Node10, Node 11, Node 12 253 // Shard 3: Node9, Node10, Node11, Node 12, Node 1 254 func (s *State) initPhysical(nodes []string, replFactor int64) error { 255 it, err := cluster.NewNodeIterator(nodes, cluster.StartAfter) 256 if err != nil { 257 return err 258 } 259 it.SetStartNode(nodes[len(nodes)-1]) 260 261 s.Physical = map[string]Physical{} 262 263 nodeSet := make(map[string]bool) 264 for i := 0; i < s.Config.DesiredCount; i++ { 265 name := generateShardName() 266 shard := Physical{Name: name} 267 shard.BelongsToNodes = make([]string, 0, replFactor) 268 for { // select shard 269 node := it.Next() 270 if len(nodeSet) == len(nodes) { // this is a new round 271 for k := range nodeSet { 272 delete(nodeSet, k) 273 } 274 } 275 if !nodeSet[node] { 276 nodeSet[node] = true 277 shard.BelongsToNodes = append(shard.BelongsToNodes, node) 278 break 279 } 280 } 281 282 for i := replFactor; i > 1; i-- { 283 shard.BelongsToNodes = append(shard.BelongsToNodes, it.Next()) 284 } 285 286 s.Physical[name] = shard 287 } 288 289 return nil 290 } 291 292 // GetPartitions based on the specified shards, available nodes, and replFactor 293 // It doesn't change the internal state 294 func (s *State) GetPartitions(lookUp nodes, shards []string, replFactor int64) (map[string][]string, error) { 295 nodes := lookUp.Candidates() 296 if len(nodes) == 0 { 297 return nil, fmt.Errorf("list of node candidates is empty") 298 } 299 if f, n := replFactor, len(nodes); f > int64(n) { 300 return nil, fmt.Errorf("not enough replicas: found %d want %d", n, f) 301 } 302 it, err := cluster.NewNodeIterator(nodes, cluster.StartAfter) 303 if err != nil { 304 return nil, err 305 } 306 it.SetStartNode(nodes[len(nodes)-1]) 307 partitions := make(map[string][]string, len(shards)) 308 nodeSet := make(map[string]bool) 309 for _, name := range shards { 310 if _, alreadyExists := s.Physical[name]; alreadyExists { 311 continue 312 } 313 owners := make([]string, 0, replFactor) 314 for { // select shard 315 node := it.Next() 316 if len(nodeSet) == len(nodes) { // this is a new round 317 for k := range nodeSet { 318 delete(nodeSet, k) 319 } 320 } 321 if !nodeSet[node] { 322 nodeSet[node] = true 323 owners = append(owners, node) 324 break 325 } 326 } 327 328 for i := replFactor; i > 1; i-- { 329 owners = append(owners, it.Next()) 330 } 331 332 partitions[name] = owners 333 } 334 335 return partitions, nil 336 } 337 338 // AddPartition to physical shards 339 func (s *State) AddPartition(name string, nodes []string, status string) Physical { 340 p := Physical{ 341 Name: name, 342 BelongsToNodes: nodes, 343 OwnsPercentage: 1.0, 344 Status: status, 345 } 346 s.Physical[name] = p 347 return p 348 } 349 350 // DeletePartition to physical shards 351 func (s *State) DeletePartition(name string) { 352 delete(s.Physical, name) 353 } 354 355 // ApplyNodeMapping replaces node names with their new value form nodeMapping in s. 356 // If s.LegacyBelongsToNodeForBackwardCompat is non empty, it will also perform node name replacement if present in nodeMapping. 357 func (s *State) ApplyNodeMapping(nodeMapping map[string]string) { 358 if len(nodeMapping) == 0 { 359 return 360 } 361 362 for k, v := range s.Physical { 363 if v.LegacyBelongsToNodeForBackwardCompat != "" { 364 if newNodeName, ok := nodeMapping[v.LegacyBelongsToNodeForBackwardCompat]; ok { 365 v.LegacyBelongsToNodeForBackwardCompat = newNodeName 366 } 367 } 368 369 for i, nodeName := range v.BelongsToNodes { 370 if newNodeName, ok := nodeMapping[nodeName]; ok { 371 v.BelongsToNodes[i] = newNodeName 372 } 373 } 374 375 s.Physical[k] = v 376 } 377 } 378 379 func (s *State) initVirtual() { 380 count := s.Config.DesiredVirtualCount 381 s.Virtual = make([]Virtual, count) 382 383 for i := range s.Virtual { 384 name := generateShardName() 385 h := murmur3.New64() 386 h.Write([]byte(name)) 387 s.Virtual[i] = Virtual{Name: name, Upper: h.Sum64()} 388 } 389 390 sort.Slice(s.Virtual, func(a, b int) bool { 391 return s.Virtual[a].Upper < s.Virtual[b].Upper 392 }) 393 394 for i := range s.Virtual { 395 var tokenCount uint64 396 if i == 0 { 397 tokenCount = s.Virtual[0].Upper + (math.MaxUint64 - s.Virtual[len(s.Virtual)-1].Upper) 398 } else { 399 tokenCount = s.Virtual[i].Upper - s.Virtual[i-1].Upper 400 } 401 s.Virtual[i].OwnsPercentage = float64(tokenCount) / float64(math.MaxUint64) 402 403 } 404 } 405 406 // this is a primitive distribution that only works for initializing. Once we 407 // want to support dynamic sharding, we need to come up with something better 408 // than this 409 func (s *State) distributeVirtualAmongPhysical() { 410 ids := make([]string, len(s.Virtual)) 411 for i, v := range s.Virtual { 412 ids[i] = v.Name 413 } 414 415 rand.Shuffle(len(s.Virtual), func(a, b int) { 416 ids[a], ids[b] = ids[b], ids[a] 417 }) 418 419 physicalIDs := make([]string, 0, len(s.Physical)) 420 for name := range s.Physical { 421 physicalIDs = append(physicalIDs, name) 422 } 423 424 for i, vid := range ids { 425 pickedPhysical := physicalIDs[i%len(physicalIDs)] 426 427 virtual := s.virtualByName(vid) 428 virtual.AssignedToPhysical = pickedPhysical 429 physical := s.Physical[pickedPhysical] 430 physical.OwnsVirtual = append(physical.OwnsVirtual, vid) 431 physical.OwnsPercentage += virtual.OwnsPercentage 432 s.Physical[pickedPhysical] = physical 433 } 434 } 435 436 // uses linear search, but should only be used during shard init and update 437 // operations, not in regular 438 func (s *State) virtualByName(name string) *Virtual { 439 for i := range s.Virtual { 440 if s.Virtual[i].Name == name { 441 return &s.Virtual[i] 442 } 443 } 444 445 return nil 446 } 447 448 func (s *State) virtualByToken(token uint64) *Virtual { 449 for i := range s.Virtual { 450 if token > s.Virtual[i].Upper { 451 continue 452 } 453 454 return &s.Virtual[i] 455 } 456 457 return &s.Virtual[0] 458 } 459 460 const shardNameChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 461 462 func generateShardName() string { 463 b := make([]byte, shardNameLength) 464 for i := range b { 465 b[i] = shardNameChars[rand.Intn(len(shardNameChars))] 466 } 467 468 return string(b) 469 } 470 471 func (s State) DeepCopy() State { 472 var virtualCopy []Virtual 473 474 physicalCopy := make(map[string]Physical, len(s.Physical)) 475 for name, shard := range s.Physical { 476 physicalCopy[name] = shard.DeepCopy() 477 } 478 479 if len(s.Virtual) > 0 { 480 virtualCopy = make([]Virtual, len(s.Virtual)) 481 } 482 for i, virtual := range s.Virtual { 483 virtualCopy[i] = virtual.DeepCopy() 484 } 485 486 return State{ 487 localNodeName: s.localNodeName, 488 IndexID: s.IndexID, 489 Config: s.Config.DeepCopy(), 490 Physical: physicalCopy, 491 Virtual: virtualCopy, 492 PartitioningEnabled: s.PartitioningEnabled, 493 } 494 } 495 496 func (c Config) DeepCopy() Config { 497 return Config{ 498 VirtualPerPhysical: c.VirtualPerPhysical, 499 DesiredCount: c.DesiredCount, 500 ActualCount: c.ActualCount, 501 DesiredVirtualCount: c.DesiredVirtualCount, 502 ActualVirtualCount: c.ActualVirtualCount, 503 Key: c.Key, 504 Strategy: c.Strategy, 505 Function: c.Function, 506 } 507 } 508 509 func (p Physical) DeepCopy() Physical { 510 var ownsVirtualCopy []string 511 if len(p.OwnsVirtual) > 0 { 512 ownsVirtualCopy = make([]string, len(p.OwnsVirtual)) 513 copy(ownsVirtualCopy, p.OwnsVirtual) 514 } 515 516 belongsCopy := make([]string, len(p.BelongsToNodes)) 517 copy(belongsCopy, p.BelongsToNodes) 518 519 return Physical{ 520 Name: p.Name, 521 OwnsVirtual: ownsVirtualCopy, 522 OwnsPercentage: p.OwnsPercentage, 523 BelongsToNodes: belongsCopy, 524 Status: p.Status, 525 } 526 } 527 528 func (v Virtual) DeepCopy() Virtual { 529 return Virtual{ 530 Name: v.Name, 531 Upper: v.Upper, 532 OwnsPercentage: v.OwnsPercentage, 533 AssignedToPhysical: v.AssignedToPhysical, 534 } 535 }