github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/store/redis/node.go (about) 1 package redis 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "path/filepath" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/projecteru2/core/engine" 13 enginefactory "github.com/projecteru2/core/engine/factory" 14 "github.com/projecteru2/core/engine/fake" 15 "github.com/projecteru2/core/engine/mocks/fakeengine" 16 "github.com/projecteru2/core/log" 17 "github.com/projecteru2/core/store" 18 "github.com/projecteru2/core/types" 19 "github.com/projecteru2/core/utils" 20 21 "github.com/cockroachdb/errors" 22 ) 23 24 // AddNode save it to etcd 25 // storage path in etcd is `/pod/nodes/:podname/:nodename` 26 // node->pod path in etcd is `/node/pod/:nodename` 27 // func (m *Rediaron) AddNode(ctx context.Context, name, endpoint, podname, ca, cert, key string, 28 // cpu, share int, memory, storage int64, labels map[string]string, 29 // numa types.NUMA, numaMemory types.NUMAMemory, volume types.VolumeMap) (*types.Node, error) { 30 func (r *Rediaron) AddNode(ctx context.Context, opts *types.AddNodeOptions) (*types.Node, error) { 31 _, err := r.GetPod(ctx, opts.Podname) 32 if err != nil { 33 return nil, err 34 } 35 36 return r.doAddNode(ctx, opts.Nodename, opts.Endpoint, opts.Podname, opts.Ca, opts.Cert, opts.Key, opts.Labels, opts.Test) 37 } 38 39 // RemoveNode delete a node 40 func (r *Rediaron) RemoveNode(ctx context.Context, node *types.Node) error { 41 if node == nil { 42 return nil 43 } 44 return r.doRemoveNode(ctx, node.Podname, node.Name, node.Endpoint) 45 } 46 47 // GetNode get node by name 48 func (r *Rediaron) GetNode(ctx context.Context, nodename string) (*types.Node, error) { 49 nodes, err := r.GetNodes(ctx, []string{nodename}) 50 if err != nil { 51 return nil, err 52 } 53 return nodes[0], nil 54 } 55 56 // GetNodes get nodes 57 func (r *Rediaron) GetNodes(ctx context.Context, nodenames []string) ([]*types.Node, error) { 58 nodesKeys := []string{} 59 for _, nodename := range nodenames { 60 key := fmt.Sprintf(nodeInfoKey, nodename) 61 nodesKeys = append(nodesKeys, key) 62 } 63 64 kvs, err := r.GetMulti(ctx, nodesKeys) 65 if err != nil { 66 return nil, err 67 } 68 return r.doGetNodes(ctx, kvs, nil, true, nil) 69 } 70 71 // GetNodesByPod get all nodes bound to pod 72 // here we use podname instead of pod instance 73 func (r *Rediaron) GetNodesByPod(ctx context.Context, nodeFilter *types.NodeFilter, opts ...store.Option) ([]*types.Node, error) { 74 op := store.NewOp(opts...) 75 do := func(podname string) ([]*types.Node, error) { 76 key := fmt.Sprintf(nodePodKey, podname, "*") 77 kvs, err := r.getByKeyPattern(ctx, key, 0) 78 if err != nil { 79 return nil, err 80 } 81 return r.doGetNodes(ctx, kvs, nodeFilter.Labels, nodeFilter.All, op) 82 } 83 if nodeFilter.Podname != "" { 84 return do(nodeFilter.Podname) 85 } 86 pods, err := r.GetAllPods(ctx) 87 if err != nil { 88 return nil, err 89 } 90 result := []*types.Node{} 91 for _, pod := range pods { 92 ns, err := do(pod.Name) 93 if err != nil { 94 return nil, err 95 } 96 result = append(result, ns...) 97 } 98 return result, nil 99 } 100 101 // UpdateNodes . 102 func (r *Rediaron) UpdateNodes(ctx context.Context, nodes ...*types.Node) error { 103 data := map[string]string{} 104 addIfNotEmpty := func(key, value string) { 105 if value != "" { 106 data[key] = value 107 } 108 } 109 for _, node := range nodes { 110 bytes, err := json.Marshal(node) 111 if err != nil { 112 return err 113 } 114 d := string(bytes) 115 data[fmt.Sprintf(nodeInfoKey, node.Name)] = d 116 data[fmt.Sprintf(nodePodKey, node.Podname, node.Name)] = d 117 addIfNotEmpty(fmt.Sprintf(nodeCaKey, node.Name), node.Ca) 118 addIfNotEmpty(fmt.Sprintf(nodeCertKey, node.Name), node.Cert) 119 addIfNotEmpty(fmt.Sprintf(nodeKeyKey, node.Name), node.Key) 120 } 121 return r.BatchPut(ctx, data) 122 } 123 124 // SetNodeStatus sets status for a node, value will expire after ttl seconds 125 // ttl < 0 means delete node status 126 // this is heartbeat of node 127 func (r *Rediaron) SetNodeStatus(ctx context.Context, node *types.Node, ttl int64) error { 128 if ttl == 0 { 129 return types.ErrInvaildNodeStatusTTL 130 } 131 132 // nodenames are unique 133 key := filepath.Join(nodeStatusPrefix, node.Name) 134 135 if ttl < 0 { 136 _, err := r.cli.Del(ctx, key).Result() 137 return err 138 } 139 140 data, err := json.Marshal(types.NodeStatus{ 141 Nodename: node.Name, 142 Podname: node.Podname, 143 Alive: true, 144 }) 145 if err != nil { 146 return err 147 } 148 149 _, err = r.cli.Set(ctx, key, string(data), time.Duration(ttl)*time.Second).Result() 150 return err 151 } 152 153 // GetNodeStatus returns status for a node 154 func (r *Rediaron) GetNodeStatus(ctx context.Context, nodename string) (*types.NodeStatus, error) { 155 key := filepath.Join(nodeStatusPrefix, nodename) 156 ev, err := r.GetOne(ctx, key) 157 if err != nil { 158 return nil, err 159 } 160 161 ns := &types.NodeStatus{} 162 if err := json.Unmarshal([]byte(ev), ns); err != nil { 163 return nil, err 164 } 165 return ns, nil 166 } 167 168 // NodeStatusStream returns a stream of node status 169 // it tells you if status of a node is changed, either PUT or DELETE 170 // PUT -> Alive: true 171 // DELETE -> Alive: false 172 func (r *Rediaron) NodeStatusStream(ctx context.Context) chan *types.NodeStatus { 173 ch := make(chan *types.NodeStatus) 174 logger := log.WithFunc("store.redis.NodeStatusStream") 175 _ = r.pool.Invoke(func() { 176 defer func() { 177 logger.Info(ctx, "close NodeStatusStream channel") 178 close(ch) 179 }() 180 181 key := filepath.Join(nodeStatusPrefix, "*") 182 logger.Infof(ctx, "watch on %s", key) 183 for message := range r.KNotify(ctx, key) { 184 nodename := extractNodename(message.Key) 185 status := &types.NodeStatus{ 186 Nodename: nodename, 187 Alive: strings.ToLower(message.Action) != actionExpired, 188 } 189 node, err := r.GetNode(ctx, nodename) 190 if err != nil { 191 status.Error = err 192 } else { 193 status.Podname = node.Podname 194 } 195 ch <- status 196 } 197 }) 198 return ch 199 } 200 201 func (r *Rediaron) LoadNodeCert(ctx context.Context, node *types.Node) (err error) { 202 keyFormats := []string{nodeCaKey, nodeCertKey, nodeKeyKey} 203 data := []string{"", "", ""} 204 for i := 0; i < 3; i++ { 205 v, err := r.GetOne(ctx, fmt.Sprintf(keyFormats[i], node.Name)) 206 if err != nil { 207 if !isRedisNoKeyError(err) { 208 log.WithFunc("store.redis.LoadNodeCert").Warnf(ctx, "Get key failed %+v", err) 209 return err 210 } 211 continue 212 } 213 data[i] = v 214 } 215 node.Ca, node.Cert, node.Key = data[0], data[1], data[2] 216 return nil 217 } 218 219 func (r *Rediaron) makeClient(ctx context.Context, node *types.Node) (client engine.API, err error) { 220 // try to get from cache without ca/cert/key 221 if client = enginefactory.GetEngineFromCache(ctx, node.Endpoint, "", "", ""); client != nil { 222 return client, nil 223 } 224 keyFormats := []string{nodeCaKey, nodeCertKey, nodeKeyKey} 225 data := []string{"", "", ""} 226 for i := 0; i < 3; i++ { 227 v, err := r.GetOne(ctx, fmt.Sprintf(keyFormats[i], node.Name)) 228 if err != nil { 229 if !isRedisNoKeyError(err) { 230 log.WithFunc("store.redis.makeClient").Warnf(ctx, "Get key failed %+v", err) 231 return nil, err 232 } 233 continue 234 } 235 data[i] = v 236 } 237 238 client, err = enginefactory.GetEngine(ctx, r.config, node.Name, node.Endpoint, data[0], data[1], data[2]) 239 if err != nil { 240 return nil, err 241 } 242 return client, nil 243 } 244 245 func (r *Rediaron) doAddNode(ctx context.Context, name, endpoint, podname, ca, cert, key string, labels map[string]string, test bool) (*types.Node, error) { 246 data := map[string]string{} 247 // 如果有tls的证书需要保存就保存一下 248 if ca != "" { 249 data[fmt.Sprintf(nodeCaKey, name)] = ca 250 } 251 if cert != "" { 252 data[fmt.Sprintf(nodeCertKey, name)] = cert 253 } 254 if key != "" { 255 data[fmt.Sprintf(nodeKeyKey, name)] = key 256 } 257 258 node := &types.Node{ 259 NodeMeta: types.NodeMeta{ 260 Name: name, 261 Endpoint: endpoint, 262 Podname: podname, 263 Labels: labels, 264 }, 265 Available: true, 266 Bypass: false, 267 Test: test || strings.HasPrefix(endpoint, fakeengine.PrefixKey), 268 } 269 270 bytes, err := json.Marshal(node) 271 if err != nil { 272 return nil, err 273 } 274 275 d := string(bytes) 276 data[fmt.Sprintf(nodeInfoKey, name)] = d 277 data[fmt.Sprintf(nodePodKey, podname, name)] = d 278 279 err = r.BatchCreate(ctx, data) 280 if err != nil { 281 return nil, err 282 } 283 284 return node, nil 285 } 286 287 // 因为是先写etcd的证书再拿client 288 // 所以可能出现实际上node创建失败但是却写好了证书的情况 289 // 所以需要删除这些留存的证书 290 // 至于结果是不是成功就无所谓了 291 func (r *Rediaron) doRemoveNode(ctx context.Context, podname, nodename, endpoint string) error { 292 keys := []string{ 293 fmt.Sprintf(nodeInfoKey, nodename), 294 fmt.Sprintf(nodePodKey, podname, nodename), 295 fmt.Sprintf(nodeCaKey, nodename), 296 fmt.Sprintf(nodeCertKey, nodename), 297 fmt.Sprintf(nodeKeyKey, nodename), 298 } 299 300 err := r.BatchDelete(ctx, keys) 301 log.WithFunc("store.redis.doRemoveNode").Infof(ctx, "Node (%s, %s, %s) deleted", podname, nodename, endpoint) 302 return err 303 } 304 305 func (r *Rediaron) doGetNodes( 306 ctx context.Context, kvs map[string]string, 307 labels map[string]string, all bool, op *store.Op, 308 ) (nodes []*types.Node, err error) { 309 allNodes := []*types.Node{} 310 for _, value := range kvs { 311 node := &types.Node{} 312 if err := json.Unmarshal([]byte(value), node); err != nil { 313 return nil, err 314 } 315 node.Engine = &fake.EngineWithErr{DefaultErr: types.ErrNilEngine} 316 if utils.LabelsFilter(node.Labels, labels) { 317 allNodes = append(allNodes, node) 318 } 319 } 320 logger := log.WithFunc("store.redis.doGetNodes") 321 322 wg := &sync.WaitGroup{} 323 wg.Add(len(allNodes)) 324 nodeChan := make(chan *types.Node, len(allNodes)) 325 326 for _, node := range allNodes { 327 node := node 328 _ = r.pool.Invoke(func() { 329 defer wg.Done() 330 if node.Test { 331 node.Available = true && !node.Bypass 332 } else if _, err := r.GetNodeStatus(ctx, node.Name); err != nil && !errors.Is(err, types.ErrInvaildCount) { 333 logger.Errorf(ctx, err, "failed to get node status of %+v", node.Name) 334 } else { 335 node.Available = err == nil 336 } 337 338 if !all && node.IsDown() { 339 return 340 } 341 342 nodeChan <- node 343 if op == nil || (!op.WithoutEngine) { 344 if client, err := r.makeClient(ctx, node); err != nil { 345 logger.Errorf(ctx, err, "failed to make client for %+v", node.Name) 346 } else { 347 node.Engine = client 348 } 349 } 350 }) 351 } 352 wg.Wait() 353 close(nodeChan) 354 355 for node := range nodeChan { 356 nodes = append(nodes, node) 357 } 358 359 return nodes, nil 360 }