github.com/weaviate/weaviate@v1.24.6/usecases/cluster/delegate.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package cluster
    13  
    14  import (
    15  	"bytes"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"math/rand"
    19  	"sort"
    20  	"sync"
    21  	"time"
    22  
    23  	enterrors "github.com/weaviate/weaviate/entities/errors"
    24  
    25  	"github.com/hashicorp/memberlist"
    26  	"github.com/sirupsen/logrus"
    27  )
    28  
    29  // _OpCode represents the type of supported operation
    30  type _OpCode uint8
    31  
    32  const (
    33  	// _ProtoVersion internal protocol version for exchanging messages
    34  	_ProtoVersion uint8 = 1
    35  	// _OpCodeDisk operation code for getting disk space
    36  	_OpCodeDisk _OpCode = 1
    37  	// _ProtoTTL used to decide when to update the cache
    38  	_ProtoTTL = time.Second * 8
    39  )
    40  
    41  // spaceMsg is used to notify other nodes about current disk usage
    42  type spaceMsg struct {
    43  	header
    44  	DiskUsage
    45  	NodeLen uint8  // = len(Node) is required to marshal Node
    46  	Node    string // node space
    47  }
    48  
    49  // header of an operation
    50  type header struct {
    51  	// OpCode operation code
    52  	OpCode _OpCode
    53  	// ProtoVersion protocol we will speak
    54  	ProtoVersion uint8
    55  }
    56  
    57  // DiskUsage contains total and available space in B
    58  type DiskUsage struct {
    59  	// Total disk space
    60  	Total uint64
    61  	// Total available space
    62  	Available uint64
    63  }
    64  
    65  // NodeInfo disk space
    66  type NodeInfo struct {
    67  	DiskUsage
    68  	LastTimeMilli int64 // last update time in milliseconds
    69  }
    70  
    71  func (d *spaceMsg) marshal() (data []byte, err error) {
    72  	buf := bytes.NewBuffer(make([]byte, 0, 24+len(d.Node)))
    73  	if err := binary.Write(buf, binary.BigEndian, d.header); err != nil {
    74  		return nil, err
    75  	}
    76  	if err := binary.Write(buf, binary.BigEndian, d.DiskUsage); err != nil {
    77  		return nil, err
    78  	}
    79  	// code node name starting by its length
    80  	if err := buf.WriteByte(d.NodeLen); err != nil {
    81  		return nil, err
    82  	}
    83  	_, err = buf.Write([]byte(d.Node))
    84  	return buf.Bytes(), err
    85  }
    86  
    87  func (d *spaceMsg) unmarshal(data []byte) (err error) {
    88  	rd := bytes.NewReader(data)
    89  	if err = binary.Read(rd, binary.BigEndian, &d.header); err != nil {
    90  		return
    91  	}
    92  	if err = binary.Read(rd, binary.BigEndian, &d.DiskUsage); err != nil {
    93  		return
    94  	}
    95  
    96  	// decode node name start by its length
    97  	if d.NodeLen, err = rd.ReadByte(); err != nil {
    98  		return
    99  	}
   100  	begin := len(data) - rd.Len()
   101  	end := begin + int(d.NodeLen)
   102  	// make sure this version is backward compatible
   103  	if _ProtoVersion <= 1 && begin+int(d.NodeLen) != len(data) {
   104  		begin-- // since previous version doesn't encode the length
   105  		end = len(data)
   106  		d.NodeLen = uint8(end - begin)
   107  	}
   108  	d.Node = string(data[begin:end])
   109  	return nil
   110  }
   111  
   112  // delegate implements the memberList delegate interface
   113  type delegate struct {
   114  	Name     string
   115  	dataPath string
   116  	log      logrus.FieldLogger
   117  	sync.Mutex
   118  	Cache map[string]NodeInfo
   119  
   120  	mutex    sync.Mutex
   121  	hostInfo NodeInfo
   122  }
   123  
   124  func (d *delegate) setOwnSpace(x DiskUsage) {
   125  	d.mutex.Lock()
   126  	d.hostInfo = NodeInfo{DiskUsage: x, LastTimeMilli: time.Now().UnixMilli()}
   127  	d.mutex.Unlock()
   128  }
   129  
   130  func (d *delegate) ownInfo() NodeInfo {
   131  	d.mutex.Lock()
   132  	defer d.mutex.Unlock()
   133  	return d.hostInfo
   134  }
   135  
   136  // init must be called first to initialize the cache
   137  func (d *delegate) init(diskSpace func(path string) (DiskUsage, error)) error {
   138  	d.Cache = make(map[string]NodeInfo, 32)
   139  	if diskSpace == nil {
   140  		return fmt.Errorf("function calculating disk space cannot be empty")
   141  	}
   142  	lastTime := time.Now()
   143  	minUpdatePeriod := time.Second + _ProtoTTL/3
   144  	space, err := diskSpace(d.dataPath)
   145  	if err != nil {
   146  		lastTime = lastTime.Add(-minUpdatePeriod)
   147  		d.log.Errorf("calculate disk space: %v", err)
   148  	}
   149  
   150  	d.setOwnSpace(space)
   151  	d.set(d.Name, NodeInfo{space, lastTime.UnixMilli()}) // cache
   152  
   153  	// delegate remains alive throughout the entire program.
   154  	enterrors.GoWrapper(func() { d.updater(_ProtoTTL, minUpdatePeriod, diskSpace) }, d.log)
   155  	return nil
   156  }
   157  
   158  // NodeMeta is used to retrieve meta-data about the current node
   159  // when broadcasting an alive message. It's length is limited to
   160  // the given byte size. This metadata is available in the Node structure.
   161  func (d *delegate) NodeMeta(limit int) (meta []byte) {
   162  	return nil
   163  }
   164  
   165  // LocalState is used for a TCP Push/Pull. This is sent to
   166  // the remote side in addition to the membership information. Any
   167  // data can be sent here. See MergeRemoteState as well. The `join`
   168  // boolean indicates this is for a join instead of a push/pull.
   169  func (d *delegate) LocalState(join bool) []byte {
   170  	var (
   171  		info = d.ownInfo()
   172  		err  error
   173  	)
   174  
   175  	d.set(d.Name, info) // cache new value
   176  
   177  	x := spaceMsg{
   178  		header{
   179  			OpCode:       _OpCodeDisk,
   180  			ProtoVersion: _ProtoVersion,
   181  		},
   182  		info.DiskUsage,
   183  		uint8(len(d.Name)),
   184  		d.Name,
   185  	}
   186  	bytes, err := x.marshal()
   187  	if err != nil {
   188  		d.log.WithField("action", "delegate.local_state.marshal").Error(err)
   189  		return nil
   190  	}
   191  	return bytes
   192  }
   193  
   194  // MergeRemoteState is invoked after a TCP Push/Pull. This is the
   195  // state received from the remote side and is the result of the
   196  // remote side's LocalState call. The 'join'
   197  // boolean indicates this is for a join instead of a push/pull.
   198  func (d *delegate) MergeRemoteState(data []byte, join bool) {
   199  	// Does operation match _OpCodeDisk
   200  	if _OpCode(data[0]) != _OpCodeDisk {
   201  		return
   202  	}
   203  	var x spaceMsg
   204  	if err := x.unmarshal(data); err != nil || x.Node == "" {
   205  		d.log.WithField("action", "delegate.merge_remote.unmarshal").
   206  			WithField("data", string(data)).Error(err)
   207  		return
   208  	}
   209  	info := NodeInfo{x.DiskUsage, time.Now().UnixMilli()}
   210  	d.set(x.Node, info)
   211  }
   212  
   213  func (d *delegate) NotifyMsg(data []byte) {}
   214  
   215  func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
   216  	return nil
   217  }
   218  
   219  // get returns info about about a specific node in the cluster
   220  func (d *delegate) get(node string) (NodeInfo, bool) {
   221  	d.Lock()
   222  	defer d.Unlock()
   223  	x, ok := d.Cache[node]
   224  	return x, ok
   225  }
   226  
   227  func (d *delegate) set(node string, x NodeInfo) {
   228  	d.Lock()
   229  	defer d.Unlock()
   230  	d.Cache[node] = x
   231  }
   232  
   233  // delete key from the cache
   234  func (d *delegate) delete(node string) {
   235  	d.Lock()
   236  	defer d.Unlock()
   237  	delete(d.Cache, node)
   238  }
   239  
   240  // sortCandidates by the amount of free space in descending order
   241  //
   242  // Two nodes are considered equivalent if the difference between their
   243  // free spaces is less than 32MB.
   244  // The free space is just an rough estimate of the actual amount.
   245  // The Lower bound 32MB helps to mitigate the risk of selecting same set of nodes
   246  // when selections happens concurrently on different initiator nodes.
   247  func (d *delegate) sortCandidates(names []string) []string {
   248  	rand.Shuffle(len(names), func(i, j int) { names[i], names[j] = names[j], names[i] })
   249  
   250  	d.Lock()
   251  	defer d.Unlock()
   252  	m := d.Cache
   253  	sort.Slice(names, func(i, j int) bool {
   254  		return (m[names[j]].Available >> 25) < (m[names[i]].Available >> 25)
   255  	})
   256  
   257  	return names
   258  }
   259  
   260  // updater a function which updates node information periodically
   261  func (d *delegate) updater(period, minPeriod time.Duration, du func(path string) (DiskUsage, error)) {
   262  	t := time.NewTicker(period)
   263  	defer t.Stop()
   264  	curTime := time.Now()
   265  	for range t.C {
   266  		if time.Since(curTime) < minPeriod { // too short
   267  			continue // wait for next cycle to avoid overwhelming the disk
   268  		}
   269  		space, err := du(d.dataPath)
   270  		if err != nil {
   271  			d.log.WithField("action", "delegate.local_state.disk_usage").Error(err)
   272  		} else {
   273  			d.setOwnSpace(space)
   274  		}
   275  		curTime = time.Now()
   276  	}
   277  }
   278  
   279  // events implement memberlist.EventDelegate interface
   280  // EventDelegate is a simpler delegate that is used only to receive
   281  // notifications about members joining and leaving. The methods in this
   282  // delegate may be called by multiple goroutines, but never concurrently.
   283  // This allows you to reason about ordering.
   284  type events struct {
   285  	d *delegate
   286  }
   287  
   288  // NotifyJoin is invoked when a node is detected to have joined.
   289  // The Node argument must not be modified.
   290  func (e events) NotifyJoin(*memberlist.Node) {}
   291  
   292  // NotifyLeave is invoked when a node is detected to have left.
   293  // The Node argument must not be modified.
   294  func (e events) NotifyLeave(node *memberlist.Node) {
   295  	e.d.delete(node.Name)
   296  }
   297  
   298  // NotifyUpdate is invoked when a node is detected to have
   299  // updated, usually involving the meta data. The Node argument
   300  // must not be modified.
   301  func (e events) NotifyUpdate(*memberlist.Node) {}