github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/api/cluster.go (about)

     1  // Package api provides native Go-based API/SDK over HTTP(S).
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package api
     6  
     7  import (
     8  	"fmt"
     9  	"net/http"
    10  	"net/url"
    11  
    12  	"github.com/NVIDIA/aistore/api/apc"
    13  	"github.com/NVIDIA/aistore/cmn"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/core/meta"
    16  )
    17  
    18  // to be used by external watchdogs (Kubernetes, etc.)
    19  // (compare with api.Health below)
    20  func GetProxyReadiness(bp BaseParams) error {
    21  	bp.Method = http.MethodGet
    22  	q := url.Values{apc.QparamHealthReadiness: []string{"true"}}
    23  	reqParams := AllocRp()
    24  	{
    25  		reqParams.BaseParams = bp
    26  		reqParams.Path = apc.URLPathHealth.S
    27  		reqParams.Query = q
    28  	}
    29  	err := reqParams.DoRequest()
    30  	FreeRp(reqParams)
    31  	return err
    32  }
    33  
    34  func Health(bp BaseParams, readyToRebalance ...bool) error {
    35  	reqParams := mkhealth(bp, readyToRebalance...)
    36  	err := reqParams.DoRequest()
    37  	FreeRp(reqParams)
    38  	return err
    39  }
    40  
    41  func HealthUptime(bp BaseParams, readyToRebalance ...bool) (string, string, error) {
    42  	reqParams := mkhealth(bp, readyToRebalance...)
    43  	hdr, _, err := reqParams.doReqHdr()
    44  	if err != nil {
    45  		return "", "", err
    46  	}
    47  	clutime, nutime := hdr.Get(apc.HdrClusterUptime), hdr.Get(apc.HdrNodeUptime)
    48  	FreeRp(reqParams)
    49  	return clutime, nutime, err
    50  }
    51  
    52  func mkhealth(bp BaseParams, readyToRebalance ...bool) (reqParams *ReqParams) {
    53  	var q url.Values
    54  	bp.Method = http.MethodGet
    55  	if len(readyToRebalance) > 0 && readyToRebalance[0] {
    56  		q = url.Values{apc.QparamPrimaryReadyReb: []string{"true"}}
    57  	}
    58  	reqParams = AllocRp()
    59  	{
    60  		reqParams.BaseParams = bp
    61  		reqParams.Path = apc.URLPathHealth.S
    62  		reqParams.Query = q
    63  	}
    64  	return
    65  }
    66  
    67  // get cluster map from a BaseParams-referenced node
    68  func GetClusterMap(bp BaseParams) (smap *meta.Smap, err error) {
    69  	bp.Method = http.MethodGet
    70  	reqParams := AllocRp()
    71  	{
    72  		reqParams.BaseParams = bp
    73  		reqParams.Path = apc.URLPathDae.S
    74  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatSmap}}
    75  	}
    76  	_, err = reqParams.DoReqAny(&smap)
    77  	FreeRp(reqParams)
    78  	return smap, err
    79  }
    80  
    81  // GetNodeClusterMap retrieves cluster map from the specified node.
    82  func GetNodeClusterMap(bp BaseParams, sid string) (smap *meta.Smap, err error) {
    83  	bp.Method = http.MethodGet
    84  	reqParams := AllocRp()
    85  	{
    86  		reqParams.BaseParams = bp
    87  		reqParams.Path = apc.URLPathReverseDae.S
    88  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatSmap}}
    89  		reqParams.Header = http.Header{apc.HdrNodeID: []string{sid}}
    90  	}
    91  	_, err = reqParams.DoReqAny(&smap)
    92  	FreeRp(reqParams)
    93  	return
    94  }
    95  
    96  // get bucket metadata (BMD) from a BaseParams-referenced node
    97  func GetBMD(bp BaseParams) (bmd *meta.BMD, err error) {
    98  	bp.Method = http.MethodGet
    99  	reqParams := AllocRp()
   100  	{
   101  		reqParams.BaseParams = bp
   102  		reqParams.Path = apc.URLPathDae.S
   103  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatBMD}}
   104  	}
   105  
   106  	bmd = &meta.BMD{}
   107  	_, err = reqParams.DoReqAny(bmd)
   108  	FreeRp(reqParams)
   109  	return bmd, err
   110  }
   111  
   112  // - get (smap, bmd, config) *cluster-level* metadata from the spec-ed node
   113  // - compare with GetClusterMap, GetNodeClusterMap, GetClusterConfig et al.
   114  // - TODO: etl meta
   115  func GetNodeMeta(bp BaseParams, sid, what string) (out any, err error) {
   116  	bp.Method = http.MethodGet
   117  	reqParams := AllocRp()
   118  	{
   119  		reqParams.BaseParams = bp
   120  		reqParams.Path = apc.URLPathReverseDae.S
   121  		reqParams.Query = url.Values{apc.QparamWhat: []string{what}}
   122  		reqParams.Header = http.Header{apc.HdrNodeID: []string{sid}}
   123  	}
   124  	switch what {
   125  	case apc.WhatSmap:
   126  		smap := meta.Smap{}
   127  		_, err = reqParams.DoReqAny(&smap)
   128  		out = &smap
   129  	case apc.WhatBMD:
   130  		bmd := meta.BMD{}
   131  		_, err = reqParams.DoReqAny(&bmd)
   132  		out = &bmd
   133  	case apc.WhatClusterConfig:
   134  		config := cmn.ClusterConfig{}
   135  		_, err = reqParams.DoReqAny(&config)
   136  		out = &config
   137  	default:
   138  		err = fmt.Errorf("unknown or unsupported cluster-level metadata type %q", what)
   139  		return
   140  	}
   141  	FreeRp(reqParams)
   142  	return
   143  }
   144  
   145  // GetClusterSysInfo retrieves cluster's system information
   146  func GetClusterSysInfo(bp BaseParams) (info apc.ClusterSysInfo, err error) {
   147  	bp.Method = http.MethodGet
   148  	reqParams := AllocRp()
   149  	{
   150  		reqParams.BaseParams = bp
   151  		reqParams.Path = apc.URLPathClu.S
   152  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatSysInfo}}
   153  	}
   154  	_, err = reqParams.DoReqAny(&info)
   155  	FreeRp(reqParams)
   156  	return
   157  }
   158  
   159  func GetRemoteAIS(bp BaseParams) (remais meta.RemAisVec, err error) {
   160  	bp.Method = http.MethodGet
   161  	reqParams := AllocRp()
   162  	{
   163  		reqParams.BaseParams = bp
   164  		reqParams.Path = apc.URLPathClu.S
   165  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatRemoteAIS}}
   166  	}
   167  	_, err = reqParams.DoReqAny(&remais)
   168  	FreeRp(reqParams)
   169  	return
   170  }
   171  
   172  // JoinCluster add a node to a cluster.
   173  func JoinCluster(bp BaseParams, nodeInfo *meta.Snode) (rebID, sid string, err error) {
   174  	bp.Method = http.MethodPost
   175  	reqParams := AllocRp()
   176  	{
   177  		reqParams.BaseParams = bp
   178  		reqParams.Path = apc.URLPathCluUserReg.S
   179  		reqParams.Body = cos.MustMarshal(nodeInfo)
   180  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   181  	}
   182  
   183  	var info apc.JoinNodeResult
   184  	_, err = reqParams.DoReqAny(&info)
   185  	FreeRp(reqParams)
   186  	return info.RebalanceID, info.DaemonID, err
   187  }
   188  
   189  // SetPrimaryProxy given a daemonID sets that corresponding proxy as the
   190  // primary proxy of the cluster.
   191  func SetPrimaryProxy(bp BaseParams, newPrimaryID string, force bool) error {
   192  	bp.Method = http.MethodPut
   193  	reqParams := AllocRp()
   194  	reqParams.BaseParams = bp
   195  	reqParams.Path = apc.URLPathCluProxy.Join(newPrimaryID)
   196  	if force {
   197  		reqParams.Query = url.Values{apc.QparamForce: []string{"true"}}
   198  	}
   199  	err := reqParams.DoRequest()
   200  	FreeRp(reqParams)
   201  	return err
   202  }
   203  
   204  // SetClusterConfig given key-value pairs of cluster configuration parameters,
   205  // sets the cluster-wide configuration accordingly. Setting cluster-wide
   206  // configuration requires sending the request to a proxy.
   207  func SetClusterConfig(bp BaseParams, nvs cos.StrKVs, transient bool) error {
   208  	q := make(url.Values, len(nvs))
   209  	for key, val := range nvs {
   210  		q.Set(key, val)
   211  	}
   212  	if transient {
   213  		q.Set(apc.ActTransient, "true")
   214  	}
   215  	bp.Method = http.MethodPut
   216  	reqParams := AllocRp()
   217  	{
   218  		reqParams.BaseParams = bp
   219  		reqParams.Path = apc.URLPathCluSetConf.S
   220  		reqParams.Query = q
   221  	}
   222  	err := reqParams.DoRequest()
   223  	FreeRp(reqParams)
   224  	return err
   225  }
   226  
   227  // SetClusterConfigUsingMsg sets the cluster-wide configuration
   228  // using the `cmn.ConfigToSet` parameter provided.
   229  func SetClusterConfigUsingMsg(bp BaseParams, configToUpdate *cmn.ConfigToSet, transient bool) error {
   230  	var (
   231  		q   url.Values
   232  		msg = apc.ActMsg{Action: apc.ActSetConfig, Value: configToUpdate}
   233  	)
   234  	if transient {
   235  		q.Set(apc.ActTransient, "true")
   236  	}
   237  	bp.Method = http.MethodPut
   238  	reqParams := AllocRp()
   239  	{
   240  		reqParams.BaseParams = bp
   241  		reqParams.Path = apc.URLPathClu.S
   242  		reqParams.Body = cos.MustMarshal(msg)
   243  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   244  		reqParams.Query = q
   245  	}
   246  	err := reqParams.DoRequest()
   247  	FreeRp(reqParams)
   248  	return err
   249  }
   250  
   251  // all nodes: reset configuration to cluster defaults
   252  func ResetClusterConfig(bp BaseParams) error {
   253  	return _putCluster(bp, apc.ActMsg{Action: apc.ActResetConfig})
   254  }
   255  
   256  func RotateClusterLogs(bp BaseParams) error {
   257  	return _putCluster(bp, apc.ActMsg{Action: apc.ActRotateLogs})
   258  }
   259  
   260  func _putCluster(bp BaseParams, msg apc.ActMsg) error {
   261  	bp.Method = http.MethodPut
   262  	reqParams := AllocRp()
   263  	{
   264  		reqParams.BaseParams = bp
   265  		reqParams.Path = apc.URLPathClu.S
   266  		reqParams.Body = cos.MustMarshal(msg)
   267  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   268  	}
   269  	err := reqParams.DoRequest()
   270  	FreeRp(reqParams)
   271  	return err
   272  }
   273  
   274  // GetClusterConfig returns cluster-wide configuration
   275  // (compare with `api.GetDaemonConfig`)
   276  func GetClusterConfig(bp BaseParams) (*cmn.ClusterConfig, error) {
   277  	bp.Method = http.MethodGet
   278  	reqParams := AllocRp()
   279  	{
   280  		reqParams.BaseParams = bp
   281  		reqParams.Path = apc.URLPathClu.S
   282  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatClusterConfig}}
   283  	}
   284  
   285  	cluConfig := &cmn.ClusterConfig{}
   286  	_, err := reqParams.DoReqAny(cluConfig)
   287  	FreeRp(reqParams)
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	return cluConfig, nil
   292  }
   293  
   294  func AttachRemoteAIS(bp BaseParams, alias, u string) error {
   295  	bp.Method = http.MethodPut
   296  	reqParams := AllocRp()
   297  	{
   298  		reqParams.BaseParams = bp
   299  		reqParams.Path = apc.URLPathCluAttach.S
   300  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatRemoteAIS}}
   301  		reqParams.Header = http.Header{
   302  			apc.HdrRemAisAlias: []string{alias},
   303  			apc.HdrRemAisURL:   []string{u},
   304  		}
   305  	}
   306  	return reqParams.DoRequest()
   307  }
   308  
   309  func DetachRemoteAIS(bp BaseParams, alias string) error {
   310  	bp.Method = http.MethodPut
   311  	reqParams := AllocRp()
   312  	{
   313  		reqParams.BaseParams = bp
   314  		reqParams.Path = apc.URLPathCluDetach.S
   315  		reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatRemoteAIS}}
   316  		reqParams.Header = http.Header{apc.HdrRemAisAlias: []string{alias}}
   317  	}
   318  	err := reqParams.DoRequest()
   319  	FreeRp(reqParams)
   320  	return err
   321  }
   322  
   323  //
   324  // Maintenance API
   325  //
   326  
   327  func StartMaintenance(bp BaseParams, actValue *apc.ActValRmNode) (xid string, err error) {
   328  	msg := apc.ActMsg{
   329  		Action: apc.ActStartMaintenance,
   330  		Value:  actValue,
   331  	}
   332  	bp.Method = http.MethodPut
   333  	reqParams := AllocRp()
   334  	{
   335  		reqParams.BaseParams = bp
   336  		reqParams.Path = apc.URLPathClu.S
   337  		reqParams.Body = cos.MustMarshal(msg)
   338  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   339  	}
   340  	_, err = reqParams.doReqStr(&xid)
   341  	FreeRp(reqParams)
   342  	return xid, err
   343  }
   344  
   345  func DecommissionNode(bp BaseParams, actValue *apc.ActValRmNode) (xid string, err error) {
   346  	msg := apc.ActMsg{
   347  		Action: apc.ActDecommissionNode,
   348  		Value:  actValue,
   349  	}
   350  	bp.Method = http.MethodPut
   351  	reqParams := AllocRp()
   352  	{
   353  		reqParams.BaseParams = bp
   354  		reqParams.Path = apc.URLPathClu.S
   355  		reqParams.Body = cos.MustMarshal(msg)
   356  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   357  	}
   358  	_, err = reqParams.doReqStr(&xid)
   359  	FreeRp(reqParams)
   360  	return xid, err
   361  }
   362  
   363  func StopMaintenance(bp BaseParams, actValue *apc.ActValRmNode) (xid string, err error) {
   364  	msg := apc.ActMsg{
   365  		Action: apc.ActStopMaintenance,
   366  		Value:  actValue,
   367  	}
   368  	bp.Method = http.MethodPut
   369  	reqParams := AllocRp()
   370  	{
   371  		reqParams.BaseParams = bp
   372  		reqParams.Path = apc.URLPathClu.S
   373  		reqParams.Body = cos.MustMarshal(msg)
   374  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   375  	}
   376  	_, err = reqParams.doReqStr(&xid)
   377  	FreeRp(reqParams)
   378  	return xid, err
   379  }
   380  
   381  // ShutdownCluster shuts down the whole cluster
   382  func ShutdownCluster(bp BaseParams) error {
   383  	msg := apc.ActMsg{Action: apc.ActShutdownCluster}
   384  	bp.Method = http.MethodPut
   385  	reqParams := AllocRp()
   386  	{
   387  		reqParams.BaseParams = bp
   388  		reqParams.Path = apc.URLPathClu.S
   389  		reqParams.Body = cos.MustMarshal(msg)
   390  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   391  	}
   392  	err := reqParams.DoRequest()
   393  	FreeRp(reqParams)
   394  	return err
   395  }
   396  
   397  // DecommissionCluster permanently decommissions entire cluster
   398  func DecommissionCluster(bp BaseParams, rmUserData bool) error {
   399  	msg := apc.ActMsg{Action: apc.ActDecommissionCluster}
   400  	if rmUserData {
   401  		msg.Value = &apc.ActValRmNode{RmUserData: true}
   402  	}
   403  	bp.Method = http.MethodPut
   404  	reqParams := AllocRp()
   405  	{
   406  		reqParams.BaseParams = bp
   407  		reqParams.Path = apc.URLPathClu.S
   408  		reqParams.Body = cos.MustMarshal(msg)
   409  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   410  	}
   411  	err := reqParams.DoRequest()
   412  	FreeRp(reqParams)
   413  	if cos.IsEOF(err) {
   414  		err = nil
   415  	}
   416  	return err
   417  }
   418  
   419  // ShutdownNode shuts down a specific node
   420  func ShutdownNode(bp BaseParams, actValue *apc.ActValRmNode) (id string, err error) {
   421  	msg := apc.ActMsg{
   422  		Action: apc.ActShutdownNode,
   423  		Value:  actValue,
   424  	}
   425  	bp.Method = http.MethodPut
   426  	reqParams := AllocRp()
   427  	{
   428  		reqParams.BaseParams = bp
   429  		reqParams.Path = apc.URLPathClu.S
   430  		reqParams.Body = cos.MustMarshal(msg)
   431  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   432  	}
   433  	_, err = reqParams.doReqStr(&id)
   434  	FreeRp(reqParams)
   435  	return id, err
   436  }
   437  
   438  // Remove node node from the cluster immediately.
   439  // - NOTE: potential data loss, advanced usage only!
   440  // - NOTE: the node remains running (compare w/ shutdown) and can be re-joined at a later time
   441  // (see api.JoinCluster).
   442  func RemoveNodeUnsafe(bp BaseParams, sid string) error {
   443  	msg := apc.ActMsg{
   444  		Action: apc.ActRmNodeUnsafe,
   445  		Value:  &apc.ActValRmNode{DaemonID: sid, SkipRebalance: true},
   446  	}
   447  	bp.Method = http.MethodPut
   448  	reqParams := AllocRp()
   449  	{
   450  		reqParams.BaseParams = bp
   451  		reqParams.Path = apc.URLPathClu.S
   452  		reqParams.Body = cos.MustMarshal(msg)
   453  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   454  	}
   455  	err := reqParams.DoRequest()
   456  	FreeRp(reqParams)
   457  	return err
   458  }