github.com/weaviate/weaviate@v1.24.6/usecases/cluster/delegate.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package cluster 13 14 import ( 15 "bytes" 16 "encoding/binary" 17 "fmt" 18 "math/rand" 19 "sort" 20 "sync" 21 "time" 22 23 enterrors "github.com/weaviate/weaviate/entities/errors" 24 25 "github.com/hashicorp/memberlist" 26 "github.com/sirupsen/logrus" 27 ) 28 29 // _OpCode represents the type of supported operation 30 type _OpCode uint8 31 32 const ( 33 // _ProtoVersion internal protocol version for exchanging messages 34 _ProtoVersion uint8 = 1 35 // _OpCodeDisk operation code for getting disk space 36 _OpCodeDisk _OpCode = 1 37 // _ProtoTTL used to decide when to update the cache 38 _ProtoTTL = time.Second * 8 39 ) 40 41 // spaceMsg is used to notify other nodes about current disk usage 42 type spaceMsg struct { 43 header 44 DiskUsage 45 NodeLen uint8 // = len(Node) is required to marshal Node 46 Node string // node space 47 } 48 49 // header of an operation 50 type header struct { 51 // OpCode operation code 52 OpCode _OpCode 53 // ProtoVersion protocol we will speak 54 ProtoVersion uint8 55 } 56 57 // DiskUsage contains total and available space in B 58 type DiskUsage struct { 59 // Total disk space 60 Total uint64 61 // Total available space 62 Available uint64 63 } 64 65 // NodeInfo disk space 66 type NodeInfo struct { 67 DiskUsage 68 LastTimeMilli int64 // last update time in milliseconds 69 } 70 71 func (d *spaceMsg) marshal() (data []byte, err error) { 72 buf := bytes.NewBuffer(make([]byte, 0, 24+len(d.Node))) 73 if err := binary.Write(buf, binary.BigEndian, d.header); err != nil { 74 return nil, err 75 } 76 if err := binary.Write(buf, binary.BigEndian, d.DiskUsage); err != nil { 77 return nil, err 78 } 79 // code node name starting by its length 80 if err := buf.WriteByte(d.NodeLen); err != nil { 81 return nil, err 82 } 83 _, err = buf.Write([]byte(d.Node)) 84 return buf.Bytes(), err 85 } 86 87 func (d *spaceMsg) unmarshal(data []byte) (err error) { 88 rd := bytes.NewReader(data) 89 if err = binary.Read(rd, binary.BigEndian, &d.header); err != nil { 90 return 91 } 92 if err = binary.Read(rd, binary.BigEndian, &d.DiskUsage); err != nil { 93 return 94 } 95 96 // decode node name start by its length 97 if d.NodeLen, err = rd.ReadByte(); err != nil { 98 return 99 } 100 begin := len(data) - rd.Len() 101 end := begin + int(d.NodeLen) 102 // make sure this version is backward compatible 103 if _ProtoVersion <= 1 && begin+int(d.NodeLen) != len(data) { 104 begin-- // since previous version doesn't encode the length 105 end = len(data) 106 d.NodeLen = uint8(end - begin) 107 } 108 d.Node = string(data[begin:end]) 109 return nil 110 } 111 112 // delegate implements the memberList delegate interface 113 type delegate struct { 114 Name string 115 dataPath string 116 log logrus.FieldLogger 117 sync.Mutex 118 Cache map[string]NodeInfo 119 120 mutex sync.Mutex 121 hostInfo NodeInfo 122 } 123 124 func (d *delegate) setOwnSpace(x DiskUsage) { 125 d.mutex.Lock() 126 d.hostInfo = NodeInfo{DiskUsage: x, LastTimeMilli: time.Now().UnixMilli()} 127 d.mutex.Unlock() 128 } 129 130 func (d *delegate) ownInfo() NodeInfo { 131 d.mutex.Lock() 132 defer d.mutex.Unlock() 133 return d.hostInfo 134 } 135 136 // init must be called first to initialize the cache 137 func (d *delegate) init(diskSpace func(path string) (DiskUsage, error)) error { 138 d.Cache = make(map[string]NodeInfo, 32) 139 if diskSpace == nil { 140 return fmt.Errorf("function calculating disk space cannot be empty") 141 } 142 lastTime := time.Now() 143 minUpdatePeriod := time.Second + _ProtoTTL/3 144 space, err := diskSpace(d.dataPath) 145 if err != nil { 146 lastTime = lastTime.Add(-minUpdatePeriod) 147 d.log.Errorf("calculate disk space: %v", err) 148 } 149 150 d.setOwnSpace(space) 151 d.set(d.Name, NodeInfo{space, lastTime.UnixMilli()}) // cache 152 153 // delegate remains alive throughout the entire program. 154 enterrors.GoWrapper(func() { d.updater(_ProtoTTL, minUpdatePeriod, diskSpace) }, d.log) 155 return nil 156 } 157 158 // NodeMeta is used to retrieve meta-data about the current node 159 // when broadcasting an alive message. It's length is limited to 160 // the given byte size. This metadata is available in the Node structure. 161 func (d *delegate) NodeMeta(limit int) (meta []byte) { 162 return nil 163 } 164 165 // LocalState is used for a TCP Push/Pull. This is sent to 166 // the remote side in addition to the membership information. Any 167 // data can be sent here. See MergeRemoteState as well. The `join` 168 // boolean indicates this is for a join instead of a push/pull. 169 func (d *delegate) LocalState(join bool) []byte { 170 var ( 171 info = d.ownInfo() 172 err error 173 ) 174 175 d.set(d.Name, info) // cache new value 176 177 x := spaceMsg{ 178 header{ 179 OpCode: _OpCodeDisk, 180 ProtoVersion: _ProtoVersion, 181 }, 182 info.DiskUsage, 183 uint8(len(d.Name)), 184 d.Name, 185 } 186 bytes, err := x.marshal() 187 if err != nil { 188 d.log.WithField("action", "delegate.local_state.marshal").Error(err) 189 return nil 190 } 191 return bytes 192 } 193 194 // MergeRemoteState is invoked after a TCP Push/Pull. This is the 195 // state received from the remote side and is the result of the 196 // remote side's LocalState call. The 'join' 197 // boolean indicates this is for a join instead of a push/pull. 198 func (d *delegate) MergeRemoteState(data []byte, join bool) { 199 // Does operation match _OpCodeDisk 200 if _OpCode(data[0]) != _OpCodeDisk { 201 return 202 } 203 var x spaceMsg 204 if err := x.unmarshal(data); err != nil || x.Node == "" { 205 d.log.WithField("action", "delegate.merge_remote.unmarshal"). 206 WithField("data", string(data)).Error(err) 207 return 208 } 209 info := NodeInfo{x.DiskUsage, time.Now().UnixMilli()} 210 d.set(x.Node, info) 211 } 212 213 func (d *delegate) NotifyMsg(data []byte) {} 214 215 func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte { 216 return nil 217 } 218 219 // get returns info about about a specific node in the cluster 220 func (d *delegate) get(node string) (NodeInfo, bool) { 221 d.Lock() 222 defer d.Unlock() 223 x, ok := d.Cache[node] 224 return x, ok 225 } 226 227 func (d *delegate) set(node string, x NodeInfo) { 228 d.Lock() 229 defer d.Unlock() 230 d.Cache[node] = x 231 } 232 233 // delete key from the cache 234 func (d *delegate) delete(node string) { 235 d.Lock() 236 defer d.Unlock() 237 delete(d.Cache, node) 238 } 239 240 // sortCandidates by the amount of free space in descending order 241 // 242 // Two nodes are considered equivalent if the difference between their 243 // free spaces is less than 32MB. 244 // The free space is just an rough estimate of the actual amount. 245 // The Lower bound 32MB helps to mitigate the risk of selecting same set of nodes 246 // when selections happens concurrently on different initiator nodes. 247 func (d *delegate) sortCandidates(names []string) []string { 248 rand.Shuffle(len(names), func(i, j int) { names[i], names[j] = names[j], names[i] }) 249 250 d.Lock() 251 defer d.Unlock() 252 m := d.Cache 253 sort.Slice(names, func(i, j int) bool { 254 return (m[names[j]].Available >> 25) < (m[names[i]].Available >> 25) 255 }) 256 257 return names 258 } 259 260 // updater a function which updates node information periodically 261 func (d *delegate) updater(period, minPeriod time.Duration, du func(path string) (DiskUsage, error)) { 262 t := time.NewTicker(period) 263 defer t.Stop() 264 curTime := time.Now() 265 for range t.C { 266 if time.Since(curTime) < minPeriod { // too short 267 continue // wait for next cycle to avoid overwhelming the disk 268 } 269 space, err := du(d.dataPath) 270 if err != nil { 271 d.log.WithField("action", "delegate.local_state.disk_usage").Error(err) 272 } else { 273 d.setOwnSpace(space) 274 } 275 curTime = time.Now() 276 } 277 } 278 279 // events implement memberlist.EventDelegate interface 280 // EventDelegate is a simpler delegate that is used only to receive 281 // notifications about members joining and leaving. The methods in this 282 // delegate may be called by multiple goroutines, but never concurrently. 283 // This allows you to reason about ordering. 284 type events struct { 285 d *delegate 286 } 287 288 // NotifyJoin is invoked when a node is detected to have joined. 289 // The Node argument must not be modified. 290 func (e events) NotifyJoin(*memberlist.Node) {} 291 292 // NotifyLeave is invoked when a node is detected to have left. 293 // The Node argument must not be modified. 294 func (e events) NotifyLeave(node *memberlist.Node) { 295 e.d.delete(node.Name) 296 } 297 298 // NotifyUpdate is invoked when a node is detected to have 299 // updated, usually involving the meta data. The Node argument 300 // must not be modified. 301 func (e events) NotifyUpdate(*memberlist.Node) {}