github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/store/redis/node.go (about)

     1  package redis
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"path/filepath"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/projecteru2/core/engine"
    13  	enginefactory "github.com/projecteru2/core/engine/factory"
    14  	"github.com/projecteru2/core/engine/fake"
    15  	"github.com/projecteru2/core/engine/mocks/fakeengine"
    16  	"github.com/projecteru2/core/log"
    17  	"github.com/projecteru2/core/store"
    18  	"github.com/projecteru2/core/types"
    19  	"github.com/projecteru2/core/utils"
    20  
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  // AddNode save it to etcd
    25  // storage path in etcd is `/pod/nodes/:podname/:nodename`
    26  // node->pod path in etcd is `/node/pod/:nodename`
    27  // func (m *Rediaron) AddNode(ctx context.Context, name, endpoint, podname, ca, cert, key string,
    28  // cpu, share int, memory, storage int64, labels map[string]string,
    29  // numa types.NUMA, numaMemory types.NUMAMemory, volume types.VolumeMap) (*types.Node, error) {
    30  func (r *Rediaron) AddNode(ctx context.Context, opts *types.AddNodeOptions) (*types.Node, error) {
    31  	_, err := r.GetPod(ctx, opts.Podname)
    32  	if err != nil {
    33  		return nil, err
    34  	}
    35  
    36  	return r.doAddNode(ctx, opts.Nodename, opts.Endpoint, opts.Podname, opts.Ca, opts.Cert, opts.Key, opts.Labels, opts.Test)
    37  }
    38  
    39  // RemoveNode delete a node
    40  func (r *Rediaron) RemoveNode(ctx context.Context, node *types.Node) error {
    41  	if node == nil {
    42  		return nil
    43  	}
    44  	return r.doRemoveNode(ctx, node.Podname, node.Name, node.Endpoint)
    45  }
    46  
    47  // GetNode get node by name
    48  func (r *Rediaron) GetNode(ctx context.Context, nodename string) (*types.Node, error) {
    49  	nodes, err := r.GetNodes(ctx, []string{nodename})
    50  	if err != nil {
    51  		return nil, err
    52  	}
    53  	return nodes[0], nil
    54  }
    55  
    56  // GetNodes get nodes
    57  func (r *Rediaron) GetNodes(ctx context.Context, nodenames []string) ([]*types.Node, error) {
    58  	nodesKeys := []string{}
    59  	for _, nodename := range nodenames {
    60  		key := fmt.Sprintf(nodeInfoKey, nodename)
    61  		nodesKeys = append(nodesKeys, key)
    62  	}
    63  
    64  	kvs, err := r.GetMulti(ctx, nodesKeys)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  	return r.doGetNodes(ctx, kvs, nil, true, nil)
    69  }
    70  
    71  // GetNodesByPod get all nodes bound to pod
    72  // here we use podname instead of pod instance
    73  func (r *Rediaron) GetNodesByPod(ctx context.Context, nodeFilter *types.NodeFilter, opts ...store.Option) ([]*types.Node, error) {
    74  	op := store.NewOp(opts...)
    75  	do := func(podname string) ([]*types.Node, error) {
    76  		key := fmt.Sprintf(nodePodKey, podname, "*")
    77  		kvs, err := r.getByKeyPattern(ctx, key, 0)
    78  		if err != nil {
    79  			return nil, err
    80  		}
    81  		return r.doGetNodes(ctx, kvs, nodeFilter.Labels, nodeFilter.All, op)
    82  	}
    83  	if nodeFilter.Podname != "" {
    84  		return do(nodeFilter.Podname)
    85  	}
    86  	pods, err := r.GetAllPods(ctx)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  	result := []*types.Node{}
    91  	for _, pod := range pods {
    92  		ns, err := do(pod.Name)
    93  		if err != nil {
    94  			return nil, err
    95  		}
    96  		result = append(result, ns...)
    97  	}
    98  	return result, nil
    99  }
   100  
   101  // UpdateNodes .
   102  func (r *Rediaron) UpdateNodes(ctx context.Context, nodes ...*types.Node) error {
   103  	data := map[string]string{}
   104  	addIfNotEmpty := func(key, value string) {
   105  		if value != "" {
   106  			data[key] = value
   107  		}
   108  	}
   109  	for _, node := range nodes {
   110  		bytes, err := json.Marshal(node)
   111  		if err != nil {
   112  			return err
   113  		}
   114  		d := string(bytes)
   115  		data[fmt.Sprintf(nodeInfoKey, node.Name)] = d
   116  		data[fmt.Sprintf(nodePodKey, node.Podname, node.Name)] = d
   117  		addIfNotEmpty(fmt.Sprintf(nodeCaKey, node.Name), node.Ca)
   118  		addIfNotEmpty(fmt.Sprintf(nodeCertKey, node.Name), node.Cert)
   119  		addIfNotEmpty(fmt.Sprintf(nodeKeyKey, node.Name), node.Key)
   120  	}
   121  	return r.BatchPut(ctx, data)
   122  }
   123  
   124  // SetNodeStatus sets status for a node, value will expire after ttl seconds
   125  // ttl < 0 means delete node status
   126  // this is heartbeat of node
   127  func (r *Rediaron) SetNodeStatus(ctx context.Context, node *types.Node, ttl int64) error {
   128  	if ttl == 0 {
   129  		return types.ErrInvaildNodeStatusTTL
   130  	}
   131  
   132  	// nodenames are unique
   133  	key := filepath.Join(nodeStatusPrefix, node.Name)
   134  
   135  	if ttl < 0 {
   136  		_, err := r.cli.Del(ctx, key).Result()
   137  		return err
   138  	}
   139  
   140  	data, err := json.Marshal(types.NodeStatus{
   141  		Nodename: node.Name,
   142  		Podname:  node.Podname,
   143  		Alive:    true,
   144  	})
   145  	if err != nil {
   146  		return err
   147  	}
   148  
   149  	_, err = r.cli.Set(ctx, key, string(data), time.Duration(ttl)*time.Second).Result()
   150  	return err
   151  }
   152  
   153  // GetNodeStatus returns status for a node
   154  func (r *Rediaron) GetNodeStatus(ctx context.Context, nodename string) (*types.NodeStatus, error) {
   155  	key := filepath.Join(nodeStatusPrefix, nodename)
   156  	ev, err := r.GetOne(ctx, key)
   157  	if err != nil {
   158  		return nil, err
   159  	}
   160  
   161  	ns := &types.NodeStatus{}
   162  	if err := json.Unmarshal([]byte(ev), ns); err != nil {
   163  		return nil, err
   164  	}
   165  	return ns, nil
   166  }
   167  
   168  // NodeStatusStream returns a stream of node status
   169  // it tells you if status of a node is changed, either PUT or DELETE
   170  // PUT    -> Alive: true
   171  // DELETE -> Alive: false
   172  func (r *Rediaron) NodeStatusStream(ctx context.Context) chan *types.NodeStatus {
   173  	ch := make(chan *types.NodeStatus)
   174  	logger := log.WithFunc("store.redis.NodeStatusStream")
   175  	_ = r.pool.Invoke(func() {
   176  		defer func() {
   177  			logger.Info(ctx, "close NodeStatusStream channel")
   178  			close(ch)
   179  		}()
   180  
   181  		key := filepath.Join(nodeStatusPrefix, "*")
   182  		logger.Infof(ctx, "watch on %s", key)
   183  		for message := range r.KNotify(ctx, key) {
   184  			nodename := extractNodename(message.Key)
   185  			status := &types.NodeStatus{
   186  				Nodename: nodename,
   187  				Alive:    strings.ToLower(message.Action) != actionExpired,
   188  			}
   189  			node, err := r.GetNode(ctx, nodename)
   190  			if err != nil {
   191  				status.Error = err
   192  			} else {
   193  				status.Podname = node.Podname
   194  			}
   195  			ch <- status
   196  		}
   197  	})
   198  	return ch
   199  }
   200  
   201  func (r *Rediaron) LoadNodeCert(ctx context.Context, node *types.Node) (err error) {
   202  	keyFormats := []string{nodeCaKey, nodeCertKey, nodeKeyKey}
   203  	data := []string{"", "", ""}
   204  	for i := 0; i < 3; i++ {
   205  		v, err := r.GetOne(ctx, fmt.Sprintf(keyFormats[i], node.Name))
   206  		if err != nil {
   207  			if !isRedisNoKeyError(err) {
   208  				log.WithFunc("store.redis.LoadNodeCert").Warnf(ctx, "Get key failed %+v", err)
   209  				return err
   210  			}
   211  			continue
   212  		}
   213  		data[i] = v
   214  	}
   215  	node.Ca, node.Cert, node.Key = data[0], data[1], data[2]
   216  	return nil
   217  }
   218  
   219  func (r *Rediaron) makeClient(ctx context.Context, node *types.Node) (client engine.API, err error) {
   220  	// try to get from cache without ca/cert/key
   221  	if client = enginefactory.GetEngineFromCache(ctx, node.Endpoint, "", "", ""); client != nil {
   222  		return client, nil
   223  	}
   224  	keyFormats := []string{nodeCaKey, nodeCertKey, nodeKeyKey}
   225  	data := []string{"", "", ""}
   226  	for i := 0; i < 3; i++ {
   227  		v, err := r.GetOne(ctx, fmt.Sprintf(keyFormats[i], node.Name))
   228  		if err != nil {
   229  			if !isRedisNoKeyError(err) {
   230  				log.WithFunc("store.redis.makeClient").Warnf(ctx, "Get key failed %+v", err)
   231  				return nil, err
   232  			}
   233  			continue
   234  		}
   235  		data[i] = v
   236  	}
   237  
   238  	client, err = enginefactory.GetEngine(ctx, r.config, node.Name, node.Endpoint, data[0], data[1], data[2])
   239  	if err != nil {
   240  		return nil, err
   241  	}
   242  	return client, nil
   243  }
   244  
   245  func (r *Rediaron) doAddNode(ctx context.Context, name, endpoint, podname, ca, cert, key string, labels map[string]string, test bool) (*types.Node, error) {
   246  	data := map[string]string{}
   247  	// 如果有tls的证书需要保存就保存一下
   248  	if ca != "" {
   249  		data[fmt.Sprintf(nodeCaKey, name)] = ca
   250  	}
   251  	if cert != "" {
   252  		data[fmt.Sprintf(nodeCertKey, name)] = cert
   253  	}
   254  	if key != "" {
   255  		data[fmt.Sprintf(nodeKeyKey, name)] = key
   256  	}
   257  
   258  	node := &types.Node{
   259  		NodeMeta: types.NodeMeta{
   260  			Name:     name,
   261  			Endpoint: endpoint,
   262  			Podname:  podname,
   263  			Labels:   labels,
   264  		},
   265  		Available: true,
   266  		Bypass:    false,
   267  		Test:      test || strings.HasPrefix(endpoint, fakeengine.PrefixKey),
   268  	}
   269  
   270  	bytes, err := json.Marshal(node)
   271  	if err != nil {
   272  		return nil, err
   273  	}
   274  
   275  	d := string(bytes)
   276  	data[fmt.Sprintf(nodeInfoKey, name)] = d
   277  	data[fmt.Sprintf(nodePodKey, podname, name)] = d
   278  
   279  	err = r.BatchCreate(ctx, data)
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  
   284  	return node, nil
   285  }
   286  
   287  // 因为是先写etcd的证书再拿client
   288  // 所以可能出现实际上node创建失败但是却写好了证书的情况
   289  // 所以需要删除这些留存的证书
   290  // 至于结果是不是成功就无所谓了
   291  func (r *Rediaron) doRemoveNode(ctx context.Context, podname, nodename, endpoint string) error {
   292  	keys := []string{
   293  		fmt.Sprintf(nodeInfoKey, nodename),
   294  		fmt.Sprintf(nodePodKey, podname, nodename),
   295  		fmt.Sprintf(nodeCaKey, nodename),
   296  		fmt.Sprintf(nodeCertKey, nodename),
   297  		fmt.Sprintf(nodeKeyKey, nodename),
   298  	}
   299  
   300  	err := r.BatchDelete(ctx, keys)
   301  	log.WithFunc("store.redis.doRemoveNode").Infof(ctx, "Node (%s, %s, %s) deleted", podname, nodename, endpoint)
   302  	return err
   303  }
   304  
   305  func (r *Rediaron) doGetNodes(
   306  	ctx context.Context, kvs map[string]string,
   307  	labels map[string]string, all bool, op *store.Op,
   308  ) (nodes []*types.Node, err error) {
   309  	allNodes := []*types.Node{}
   310  	for _, value := range kvs {
   311  		node := &types.Node{}
   312  		if err := json.Unmarshal([]byte(value), node); err != nil {
   313  			return nil, err
   314  		}
   315  		node.Engine = &fake.EngineWithErr{DefaultErr: types.ErrNilEngine}
   316  		if utils.LabelsFilter(node.Labels, labels) {
   317  			allNodes = append(allNodes, node)
   318  		}
   319  	}
   320  	logger := log.WithFunc("store.redis.doGetNodes")
   321  
   322  	wg := &sync.WaitGroup{}
   323  	wg.Add(len(allNodes))
   324  	nodeChan := make(chan *types.Node, len(allNodes))
   325  
   326  	for _, node := range allNodes {
   327  		node := node
   328  		_ = r.pool.Invoke(func() {
   329  			defer wg.Done()
   330  			if node.Test {
   331  				node.Available = true && !node.Bypass
   332  			} else if _, err := r.GetNodeStatus(ctx, node.Name); err != nil && !errors.Is(err, types.ErrInvaildCount) {
   333  				logger.Errorf(ctx, err, "failed to get node status of %+v", node.Name)
   334  			} else {
   335  				node.Available = err == nil
   336  			}
   337  
   338  			if !all && node.IsDown() {
   339  				return
   340  			}
   341  
   342  			nodeChan <- node
   343  			if op == nil || (!op.WithoutEngine) {
   344  				if client, err := r.makeClient(ctx, node); err != nil {
   345  					logger.Errorf(ctx, err, "failed to make client for %+v", node.Name)
   346  				} else {
   347  					node.Engine = client
   348  				}
   349  			}
   350  		})
   351  	}
   352  	wg.Wait()
   353  	close(nodeChan)
   354  
   355  	for node := range nodeChan {
   356  		nodes = append(nodes, node)
   357  	}
   358  
   359  	return nodes, nil
   360  }