github.com/hoveychen/kafka-go@v0.4.42/groupbalancer.go (about) 1 package kafka 2 3 import ( 4 "sort" 5 ) 6 7 // GroupMember describes a single participant in a consumer group. 8 type GroupMember struct { 9 // ID is the unique ID for this member as taken from the JoinGroup response. 10 ID string 11 12 // Topics is a list of topics that this member is consuming. 13 Topics []string 14 15 // UserData contains any information that the GroupBalancer sent to the 16 // consumer group coordinator. 17 UserData []byte 18 } 19 20 // GroupMemberAssignments holds MemberID => topic => partitions. 21 type GroupMemberAssignments map[string]map[string][]int 22 23 // GroupBalancer encapsulates the client side rebalancing logic. 24 type GroupBalancer interface { 25 // ProtocolName of the GroupBalancer 26 ProtocolName() string 27 28 // UserData provides the GroupBalancer an opportunity to embed custom 29 // UserData into the metadata. 30 // 31 // Will be used by JoinGroup to begin the consumer group handshake. 32 // 33 // See https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-JoinGroupRequest 34 UserData() ([]byte, error) 35 36 // DefineMemberships returns which members will be consuming 37 // which topic partitions 38 AssignGroups(members []GroupMember, partitions []Partition) GroupMemberAssignments 39 } 40 41 // RangeGroupBalancer groups consumers by partition 42 // 43 // Example: 5 partitions, 2 consumers 44 // C0: [0, 1, 2] 45 // C1: [3, 4] 46 // 47 // Example: 6 partitions, 3 consumers 48 // C0: [0, 1] 49 // C1: [2, 3] 50 // C2: [4, 5] 51 // 52 type RangeGroupBalancer struct{} 53 54 func (r RangeGroupBalancer) ProtocolName() string { 55 return "range" 56 } 57 58 func (r RangeGroupBalancer) UserData() ([]byte, error) { 59 return nil, nil 60 } 61 62 func (r RangeGroupBalancer) AssignGroups(members []GroupMember, topicPartitions []Partition) GroupMemberAssignments { 63 groupAssignments := GroupMemberAssignments{} 64 membersByTopic := findMembersByTopic(members) 65 66 for topic, members := range membersByTopic { 67 partitions := findPartitions(topic, topicPartitions) 68 partitionCount := len(partitions) 69 memberCount := len(members) 70 71 for memberIndex, member := range members { 72 assignmentsByTopic, ok := groupAssignments[member.ID] 73 if !ok { 74 assignmentsByTopic = map[string][]int{} 75 groupAssignments[member.ID] = assignmentsByTopic 76 } 77 78 minIndex := memberIndex * partitionCount / memberCount 79 maxIndex := (memberIndex + 1) * partitionCount / memberCount 80 81 for partitionIndex, partition := range partitions { 82 if partitionIndex >= minIndex && partitionIndex < maxIndex { 83 assignmentsByTopic[topic] = append(assignmentsByTopic[topic], partition) 84 } 85 } 86 } 87 } 88 89 return groupAssignments 90 } 91 92 // RoundrobinGroupBalancer divides partitions evenly among consumers 93 // 94 // Example: 5 partitions, 2 consumers 95 // C0: [0, 2, 4] 96 // C1: [1, 3] 97 // 98 // Example: 6 partitions, 3 consumers 99 // C0: [0, 3] 100 // C1: [1, 4] 101 // C2: [2, 5] 102 // 103 type RoundRobinGroupBalancer struct{} 104 105 func (r RoundRobinGroupBalancer) ProtocolName() string { 106 return "roundrobin" 107 } 108 109 func (r RoundRobinGroupBalancer) UserData() ([]byte, error) { 110 return nil, nil 111 } 112 113 func (r RoundRobinGroupBalancer) AssignGroups(members []GroupMember, topicPartitions []Partition) GroupMemberAssignments { 114 groupAssignments := GroupMemberAssignments{} 115 membersByTopic := findMembersByTopic(members) 116 for topic, members := range membersByTopic { 117 partitionIDs := findPartitions(topic, topicPartitions) 118 memberCount := len(members) 119 120 for memberIndex, member := range members { 121 assignmentsByTopic, ok := groupAssignments[member.ID] 122 if !ok { 123 assignmentsByTopic = map[string][]int{} 124 groupAssignments[member.ID] = assignmentsByTopic 125 } 126 127 for partitionIndex, partition := range partitionIDs { 128 if (partitionIndex % memberCount) == memberIndex { 129 assignmentsByTopic[topic] = append(assignmentsByTopic[topic], partition) 130 } 131 } 132 } 133 } 134 135 return groupAssignments 136 } 137 138 // RackAffinityGroupBalancer makes a best effort to pair up consumers with 139 // partitions whose leader is in the same rack. This strategy can have 140 // performance benefits by minimizing round trip latency between the consumer 141 // and the broker. In environments where network traffic across racks incurs 142 // charges (such as cross AZ data transfer in AWS), this strategy is also a cost 143 // optimization measure because it keeps network traffic within the local rack 144 // where possible. 145 // 146 // The primary objective is to spread partitions evenly across consumers with a 147 // secondary focus on maximizing the number of partitions where the leader and 148 // the consumer are in the same rack. For best affinity, it's recommended to 149 // have a balanced spread of consumers and partition leaders across racks. 150 // 151 // This balancer requires Kafka version 0.10.0.0+ or later. Earlier versions do 152 // not return the brokers' racks in the metadata request. 153 type RackAffinityGroupBalancer struct { 154 // Rack is the name of the rack where this consumer is running. It will be 155 // communicated to the consumer group leader via the UserData so that 156 // assignments can be made with affinity to the partition leader. 157 Rack string 158 } 159 160 func (r RackAffinityGroupBalancer) ProtocolName() string { 161 return "rack-affinity" 162 } 163 164 func (r RackAffinityGroupBalancer) AssignGroups(members []GroupMember, partitions []Partition) GroupMemberAssignments { 165 membersByTopic := make(map[string][]GroupMember) 166 for _, m := range members { 167 for _, t := range m.Topics { 168 membersByTopic[t] = append(membersByTopic[t], m) 169 } 170 } 171 172 partitionsByTopic := make(map[string][]Partition) 173 for _, p := range partitions { 174 partitionsByTopic[p.Topic] = append(partitionsByTopic[p.Topic], p) 175 } 176 177 assignments := GroupMemberAssignments{} 178 for topic := range membersByTopic { 179 topicAssignments := r.assignTopic(membersByTopic[topic], partitionsByTopic[topic]) 180 for member, parts := range topicAssignments { 181 memberAssignments, ok := assignments[member] 182 if !ok { 183 memberAssignments = make(map[string][]int) 184 assignments[member] = memberAssignments 185 } 186 memberAssignments[topic] = parts 187 } 188 } 189 return assignments 190 } 191 192 func (r RackAffinityGroupBalancer) UserData() ([]byte, error) { 193 return []byte(r.Rack), nil 194 } 195 196 func (r *RackAffinityGroupBalancer) assignTopic(members []GroupMember, partitions []Partition) map[string][]int { 197 zonedPartitions := make(map[string][]int) 198 for _, part := range partitions { 199 zone := part.Leader.Rack 200 zonedPartitions[zone] = append(zonedPartitions[zone], part.ID) 201 } 202 203 zonedConsumers := make(map[string][]string) 204 for _, member := range members { 205 zone := string(member.UserData) 206 zonedConsumers[zone] = append(zonedConsumers[zone], member.ID) 207 } 208 209 targetPerMember := len(partitions) / len(members) 210 remainder := len(partitions) % len(members) 211 assignments := make(map[string][]int) 212 213 // assign as many as possible in zone. this will assign up to partsPerMember 214 // to each consumer. it will also prefer to allocate remainder partitions 215 // in zone if possible. 216 for zone, parts := range zonedPartitions { 217 consumers := zonedConsumers[zone] 218 if len(consumers) == 0 { 219 continue 220 } 221 222 // don't over-allocate. cap partition assignments at the calculated 223 // target. 224 partsPerMember := len(parts) / len(consumers) 225 if partsPerMember > targetPerMember { 226 partsPerMember = targetPerMember 227 } 228 229 for _, consumer := range consumers { 230 assignments[consumer] = append(assignments[consumer], parts[:partsPerMember]...) 231 parts = parts[partsPerMember:] 232 } 233 234 // if we had enough partitions for each consumer in this zone to hit its 235 // target, attempt to use any leftover partitions to satisfy the total 236 // remainder by adding at most 1 partition per consumer. 237 leftover := len(parts) 238 if partsPerMember == targetPerMember { 239 if leftover > remainder { 240 leftover = remainder 241 } 242 if leftover > len(consumers) { 243 leftover = len(consumers) 244 } 245 remainder -= leftover 246 } 247 248 // this loop covers the case where we're assigning extra partitions or 249 // if there weren't enough to satisfy the targetPerMember and the zoned 250 // partitions didn't divide evenly. 251 for i := 0; i < leftover; i++ { 252 assignments[consumers[i]] = append(assignments[consumers[i]], parts[i]) 253 } 254 parts = parts[leftover:] 255 256 if len(parts) == 0 { 257 delete(zonedPartitions, zone) 258 } else { 259 zonedPartitions[zone] = parts 260 } 261 } 262 263 // assign out remainders regardless of zone. 264 var remaining []int 265 for _, partitions := range zonedPartitions { 266 remaining = append(remaining, partitions...) 267 } 268 269 for _, member := range members { 270 assigned := assignments[member.ID] 271 delta := targetPerMember - len(assigned) 272 // if it were possible to assign the remainder in zone, it's been taken 273 // care of already. now we will portion out any remainder to a member 274 // that can take it. 275 if delta >= 0 && remainder > 0 { 276 delta++ 277 remainder-- 278 } 279 if delta > 0 { 280 assignments[member.ID] = append(assigned, remaining[:delta]...) 281 remaining = remaining[delta:] 282 } 283 } 284 285 return assignments 286 } 287 288 // findPartitions extracts the partition ids associated with the topic from the 289 // list of Partitions provided. 290 func findPartitions(topic string, partitions []Partition) []int { 291 var ids []int 292 for _, partition := range partitions { 293 if partition.Topic == topic { 294 ids = append(ids, partition.ID) 295 } 296 } 297 return ids 298 } 299 300 // findMembersByTopic groups the memberGroupMetadata by topic. 301 func findMembersByTopic(members []GroupMember) map[string][]GroupMember { 302 membersByTopic := map[string][]GroupMember{} 303 for _, member := range members { 304 for _, topic := range member.Topics { 305 membersByTopic[topic] = append(membersByTopic[topic], member) 306 } 307 } 308 309 // normalize ordering of members to enabling grouping across topics by partitions 310 // 311 // Want: 312 // C0 [T0/P0, T1/P0] 313 // C1 [T0/P1, T1/P1] 314 // 315 // Not: 316 // C0 [T0/P0, T1/P1] 317 // C1 [T0/P1, T1/P0] 318 // 319 // Even though the later is still round robin, the partitions are crossed 320 // 321 for _, members := range membersByTopic { 322 sort.Slice(members, func(i, j int) bool { 323 return members[i].ID < members[j].ID 324 }) 325 } 326 327 return membersByTopic 328 } 329 330 // findGroupBalancer returns the GroupBalancer with the specified protocolName 331 // from the slice provided. 332 func findGroupBalancer(protocolName string, balancers []GroupBalancer) (GroupBalancer, bool) { 333 for _, balancer := range balancers { 334 if balancer.ProtocolName() == protocolName { 335 return balancer, true 336 } 337 } 338 return nil, false 339 }