github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/api/cluster.go (about) 1 // Package api provides native Go-based API/SDK over HTTP(S). 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package api 6 7 import ( 8 "fmt" 9 "net/http" 10 "net/url" 11 12 "github.com/NVIDIA/aistore/api/apc" 13 "github.com/NVIDIA/aistore/cmn" 14 "github.com/NVIDIA/aistore/cmn/cos" 15 "github.com/NVIDIA/aistore/core/meta" 16 ) 17 18 // to be used by external watchdogs (Kubernetes, etc.) 19 // (compare with api.Health below) 20 func GetProxyReadiness(bp BaseParams) error { 21 bp.Method = http.MethodGet 22 q := url.Values{apc.QparamHealthReadiness: []string{"true"}} 23 reqParams := AllocRp() 24 { 25 reqParams.BaseParams = bp 26 reqParams.Path = apc.URLPathHealth.S 27 reqParams.Query = q 28 } 29 err := reqParams.DoRequest() 30 FreeRp(reqParams) 31 return err 32 } 33 34 func Health(bp BaseParams, readyToRebalance ...bool) error { 35 reqParams := mkhealth(bp, readyToRebalance...) 36 err := reqParams.DoRequest() 37 FreeRp(reqParams) 38 return err 39 } 40 41 func HealthUptime(bp BaseParams, readyToRebalance ...bool) (string, string, error) { 42 reqParams := mkhealth(bp, readyToRebalance...) 43 hdr, _, err := reqParams.doReqHdr() 44 if err != nil { 45 return "", "", err 46 } 47 clutime, nutime := hdr.Get(apc.HdrClusterUptime), hdr.Get(apc.HdrNodeUptime) 48 FreeRp(reqParams) 49 return clutime, nutime, err 50 } 51 52 func mkhealth(bp BaseParams, readyToRebalance ...bool) (reqParams *ReqParams) { 53 var q url.Values 54 bp.Method = http.MethodGet 55 if len(readyToRebalance) > 0 && readyToRebalance[0] { 56 q = url.Values{apc.QparamPrimaryReadyReb: []string{"true"}} 57 } 58 reqParams = AllocRp() 59 { 60 reqParams.BaseParams = bp 61 reqParams.Path = apc.URLPathHealth.S 62 reqParams.Query = q 63 } 64 return 65 } 66 67 // get cluster map from a BaseParams-referenced node 68 func GetClusterMap(bp BaseParams) (smap *meta.Smap, err error) { 69 bp.Method = http.MethodGet 70 reqParams := AllocRp() 71 { 72 reqParams.BaseParams = bp 73 reqParams.Path = apc.URLPathDae.S 74 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatSmap}} 75 } 76 _, err = reqParams.DoReqAny(&smap) 77 FreeRp(reqParams) 78 return smap, err 79 } 80 81 // GetNodeClusterMap retrieves cluster map from the specified node. 82 func GetNodeClusterMap(bp BaseParams, sid string) (smap *meta.Smap, err error) { 83 bp.Method = http.MethodGet 84 reqParams := AllocRp() 85 { 86 reqParams.BaseParams = bp 87 reqParams.Path = apc.URLPathReverseDae.S 88 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatSmap}} 89 reqParams.Header = http.Header{apc.HdrNodeID: []string{sid}} 90 } 91 _, err = reqParams.DoReqAny(&smap) 92 FreeRp(reqParams) 93 return 94 } 95 96 // get bucket metadata (BMD) from a BaseParams-referenced node 97 func GetBMD(bp BaseParams) (bmd *meta.BMD, err error) { 98 bp.Method = http.MethodGet 99 reqParams := AllocRp() 100 { 101 reqParams.BaseParams = bp 102 reqParams.Path = apc.URLPathDae.S 103 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatBMD}} 104 } 105 106 bmd = &meta.BMD{} 107 _, err = reqParams.DoReqAny(bmd) 108 FreeRp(reqParams) 109 return bmd, err 110 } 111 112 // - get (smap, bmd, config) *cluster-level* metadata from the spec-ed node 113 // - compare with GetClusterMap, GetNodeClusterMap, GetClusterConfig et al. 114 // - TODO: etl meta 115 func GetNodeMeta(bp BaseParams, sid, what string) (out any, err error) { 116 bp.Method = http.MethodGet 117 reqParams := AllocRp() 118 { 119 reqParams.BaseParams = bp 120 reqParams.Path = apc.URLPathReverseDae.S 121 reqParams.Query = url.Values{apc.QparamWhat: []string{what}} 122 reqParams.Header = http.Header{apc.HdrNodeID: []string{sid}} 123 } 124 switch what { 125 case apc.WhatSmap: 126 smap := meta.Smap{} 127 _, err = reqParams.DoReqAny(&smap) 128 out = &smap 129 case apc.WhatBMD: 130 bmd := meta.BMD{} 131 _, err = reqParams.DoReqAny(&bmd) 132 out = &bmd 133 case apc.WhatClusterConfig: 134 config := cmn.ClusterConfig{} 135 _, err = reqParams.DoReqAny(&config) 136 out = &config 137 default: 138 err = fmt.Errorf("unknown or unsupported cluster-level metadata type %q", what) 139 return 140 } 141 FreeRp(reqParams) 142 return 143 } 144 145 // GetClusterSysInfo retrieves cluster's system information 146 func GetClusterSysInfo(bp BaseParams) (info apc.ClusterSysInfo, err error) { 147 bp.Method = http.MethodGet 148 reqParams := AllocRp() 149 { 150 reqParams.BaseParams = bp 151 reqParams.Path = apc.URLPathClu.S 152 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatSysInfo}} 153 } 154 _, err = reqParams.DoReqAny(&info) 155 FreeRp(reqParams) 156 return 157 } 158 159 func GetRemoteAIS(bp BaseParams) (remais meta.RemAisVec, err error) { 160 bp.Method = http.MethodGet 161 reqParams := AllocRp() 162 { 163 reqParams.BaseParams = bp 164 reqParams.Path = apc.URLPathClu.S 165 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatRemoteAIS}} 166 } 167 _, err = reqParams.DoReqAny(&remais) 168 FreeRp(reqParams) 169 return 170 } 171 172 // JoinCluster add a node to a cluster. 173 func JoinCluster(bp BaseParams, nodeInfo *meta.Snode) (rebID, sid string, err error) { 174 bp.Method = http.MethodPost 175 reqParams := AllocRp() 176 { 177 reqParams.BaseParams = bp 178 reqParams.Path = apc.URLPathCluUserReg.S 179 reqParams.Body = cos.MustMarshal(nodeInfo) 180 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 181 } 182 183 var info apc.JoinNodeResult 184 _, err = reqParams.DoReqAny(&info) 185 FreeRp(reqParams) 186 return info.RebalanceID, info.DaemonID, err 187 } 188 189 // SetPrimaryProxy given a daemonID sets that corresponding proxy as the 190 // primary proxy of the cluster. 191 func SetPrimaryProxy(bp BaseParams, newPrimaryID string, force bool) error { 192 bp.Method = http.MethodPut 193 reqParams := AllocRp() 194 reqParams.BaseParams = bp 195 reqParams.Path = apc.URLPathCluProxy.Join(newPrimaryID) 196 if force { 197 reqParams.Query = url.Values{apc.QparamForce: []string{"true"}} 198 } 199 err := reqParams.DoRequest() 200 FreeRp(reqParams) 201 return err 202 } 203 204 // SetClusterConfig given key-value pairs of cluster configuration parameters, 205 // sets the cluster-wide configuration accordingly. Setting cluster-wide 206 // configuration requires sending the request to a proxy. 207 func SetClusterConfig(bp BaseParams, nvs cos.StrKVs, transient bool) error { 208 q := make(url.Values, len(nvs)) 209 for key, val := range nvs { 210 q.Set(key, val) 211 } 212 if transient { 213 q.Set(apc.ActTransient, "true") 214 } 215 bp.Method = http.MethodPut 216 reqParams := AllocRp() 217 { 218 reqParams.BaseParams = bp 219 reqParams.Path = apc.URLPathCluSetConf.S 220 reqParams.Query = q 221 } 222 err := reqParams.DoRequest() 223 FreeRp(reqParams) 224 return err 225 } 226 227 // SetClusterConfigUsingMsg sets the cluster-wide configuration 228 // using the `cmn.ConfigToSet` parameter provided. 229 func SetClusterConfigUsingMsg(bp BaseParams, configToUpdate *cmn.ConfigToSet, transient bool) error { 230 var ( 231 q url.Values 232 msg = apc.ActMsg{Action: apc.ActSetConfig, Value: configToUpdate} 233 ) 234 if transient { 235 q.Set(apc.ActTransient, "true") 236 } 237 bp.Method = http.MethodPut 238 reqParams := AllocRp() 239 { 240 reqParams.BaseParams = bp 241 reqParams.Path = apc.URLPathClu.S 242 reqParams.Body = cos.MustMarshal(msg) 243 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 244 reqParams.Query = q 245 } 246 err := reqParams.DoRequest() 247 FreeRp(reqParams) 248 return err 249 } 250 251 // all nodes: reset configuration to cluster defaults 252 func ResetClusterConfig(bp BaseParams) error { 253 return _putCluster(bp, apc.ActMsg{Action: apc.ActResetConfig}) 254 } 255 256 func RotateClusterLogs(bp BaseParams) error { 257 return _putCluster(bp, apc.ActMsg{Action: apc.ActRotateLogs}) 258 } 259 260 func _putCluster(bp BaseParams, msg apc.ActMsg) error { 261 bp.Method = http.MethodPut 262 reqParams := AllocRp() 263 { 264 reqParams.BaseParams = bp 265 reqParams.Path = apc.URLPathClu.S 266 reqParams.Body = cos.MustMarshal(msg) 267 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 268 } 269 err := reqParams.DoRequest() 270 FreeRp(reqParams) 271 return err 272 } 273 274 // GetClusterConfig returns cluster-wide configuration 275 // (compare with `api.GetDaemonConfig`) 276 func GetClusterConfig(bp BaseParams) (*cmn.ClusterConfig, error) { 277 bp.Method = http.MethodGet 278 reqParams := AllocRp() 279 { 280 reqParams.BaseParams = bp 281 reqParams.Path = apc.URLPathClu.S 282 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatClusterConfig}} 283 } 284 285 cluConfig := &cmn.ClusterConfig{} 286 _, err := reqParams.DoReqAny(cluConfig) 287 FreeRp(reqParams) 288 if err != nil { 289 return nil, err 290 } 291 return cluConfig, nil 292 } 293 294 func AttachRemoteAIS(bp BaseParams, alias, u string) error { 295 bp.Method = http.MethodPut 296 reqParams := AllocRp() 297 { 298 reqParams.BaseParams = bp 299 reqParams.Path = apc.URLPathCluAttach.S 300 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatRemoteAIS}} 301 reqParams.Header = http.Header{ 302 apc.HdrRemAisAlias: []string{alias}, 303 apc.HdrRemAisURL: []string{u}, 304 } 305 } 306 return reqParams.DoRequest() 307 } 308 309 func DetachRemoteAIS(bp BaseParams, alias string) error { 310 bp.Method = http.MethodPut 311 reqParams := AllocRp() 312 { 313 reqParams.BaseParams = bp 314 reqParams.Path = apc.URLPathCluDetach.S 315 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatRemoteAIS}} 316 reqParams.Header = http.Header{apc.HdrRemAisAlias: []string{alias}} 317 } 318 err := reqParams.DoRequest() 319 FreeRp(reqParams) 320 return err 321 } 322 323 // 324 // Maintenance API 325 // 326 327 func StartMaintenance(bp BaseParams, actValue *apc.ActValRmNode) (xid string, err error) { 328 msg := apc.ActMsg{ 329 Action: apc.ActStartMaintenance, 330 Value: actValue, 331 } 332 bp.Method = http.MethodPut 333 reqParams := AllocRp() 334 { 335 reqParams.BaseParams = bp 336 reqParams.Path = apc.URLPathClu.S 337 reqParams.Body = cos.MustMarshal(msg) 338 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 339 } 340 _, err = reqParams.doReqStr(&xid) 341 FreeRp(reqParams) 342 return xid, err 343 } 344 345 func DecommissionNode(bp BaseParams, actValue *apc.ActValRmNode) (xid string, err error) { 346 msg := apc.ActMsg{ 347 Action: apc.ActDecommissionNode, 348 Value: actValue, 349 } 350 bp.Method = http.MethodPut 351 reqParams := AllocRp() 352 { 353 reqParams.BaseParams = bp 354 reqParams.Path = apc.URLPathClu.S 355 reqParams.Body = cos.MustMarshal(msg) 356 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 357 } 358 _, err = reqParams.doReqStr(&xid) 359 FreeRp(reqParams) 360 return xid, err 361 } 362 363 func StopMaintenance(bp BaseParams, actValue *apc.ActValRmNode) (xid string, err error) { 364 msg := apc.ActMsg{ 365 Action: apc.ActStopMaintenance, 366 Value: actValue, 367 } 368 bp.Method = http.MethodPut 369 reqParams := AllocRp() 370 { 371 reqParams.BaseParams = bp 372 reqParams.Path = apc.URLPathClu.S 373 reqParams.Body = cos.MustMarshal(msg) 374 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 375 } 376 _, err = reqParams.doReqStr(&xid) 377 FreeRp(reqParams) 378 return xid, err 379 } 380 381 // ShutdownCluster shuts down the whole cluster 382 func ShutdownCluster(bp BaseParams) error { 383 msg := apc.ActMsg{Action: apc.ActShutdownCluster} 384 bp.Method = http.MethodPut 385 reqParams := AllocRp() 386 { 387 reqParams.BaseParams = bp 388 reqParams.Path = apc.URLPathClu.S 389 reqParams.Body = cos.MustMarshal(msg) 390 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 391 } 392 err := reqParams.DoRequest() 393 FreeRp(reqParams) 394 return err 395 } 396 397 // DecommissionCluster permanently decommissions entire cluster 398 func DecommissionCluster(bp BaseParams, rmUserData bool) error { 399 msg := apc.ActMsg{Action: apc.ActDecommissionCluster} 400 if rmUserData { 401 msg.Value = &apc.ActValRmNode{RmUserData: true} 402 } 403 bp.Method = http.MethodPut 404 reqParams := AllocRp() 405 { 406 reqParams.BaseParams = bp 407 reqParams.Path = apc.URLPathClu.S 408 reqParams.Body = cos.MustMarshal(msg) 409 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 410 } 411 err := reqParams.DoRequest() 412 FreeRp(reqParams) 413 if cos.IsEOF(err) { 414 err = nil 415 } 416 return err 417 } 418 419 // ShutdownNode shuts down a specific node 420 func ShutdownNode(bp BaseParams, actValue *apc.ActValRmNode) (id string, err error) { 421 msg := apc.ActMsg{ 422 Action: apc.ActShutdownNode, 423 Value: actValue, 424 } 425 bp.Method = http.MethodPut 426 reqParams := AllocRp() 427 { 428 reqParams.BaseParams = bp 429 reqParams.Path = apc.URLPathClu.S 430 reqParams.Body = cos.MustMarshal(msg) 431 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 432 } 433 _, err = reqParams.doReqStr(&id) 434 FreeRp(reqParams) 435 return id, err 436 } 437 438 // Remove node node from the cluster immediately. 439 // - NOTE: potential data loss, advanced usage only! 440 // - NOTE: the node remains running (compare w/ shutdown) and can be re-joined at a later time 441 // (see api.JoinCluster). 442 func RemoveNodeUnsafe(bp BaseParams, sid string) error { 443 msg := apc.ActMsg{ 444 Action: apc.ActRmNodeUnsafe, 445 Value: &apc.ActValRmNode{DaemonID: sid, SkipRebalance: true}, 446 } 447 bp.Method = http.MethodPut 448 reqParams := AllocRp() 449 { 450 reqParams.BaseParams = bp 451 reqParams.Path = apc.URLPathClu.S 452 reqParams.Body = cos.MustMarshal(msg) 453 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 454 } 455 err := reqParams.DoRequest() 456 FreeRp(reqParams) 457 return err 458 }