github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/ic.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"net/http"
    12  	"net/url"
    13  	"strconv"
    14  	"time"
    15  
    16  	"github.com/NVIDIA/aistore/api/apc"
    17  	"github.com/NVIDIA/aistore/cmn"
    18  	"github.com/NVIDIA/aistore/cmn/cos"
    19  	"github.com/NVIDIA/aistore/cmn/debug"
    20  	"github.com/NVIDIA/aistore/cmn/nlog"
    21  	"github.com/NVIDIA/aistore/core/meta"
    22  	"github.com/NVIDIA/aistore/nl"
    23  	"github.com/NVIDIA/aistore/xact"
    24  	jsoniter "github.com/json-iterator/go"
    25  )
    26  
    27  // Information Center (IC) is a group of proxies that take care of ownership of
    28  // (Job, Task, eXtended action) entities. IC manages their lifecycle and monitors
    29  // status. When a job, task or xaction is created, it gets registered with all IC
    30  // members. Henceforth, IC acts as information source as far as status (running,
    31  // aborted, finished), progress, and statistics.
    32  //
    33  // Non-IC AIS proxies, on the other hand, redirect all corresponding requests to
    34  // one (anyone) of the IC (proxy) members.
    35  
    36  const (
    37  	// Implies equal ownership by all IC members and applies to all async ops
    38  	// that have no associated cache other than start/end timestamps and stats counters
    39  	// (case in point: list/query-objects that MAY be cached, etc.)
    40  	equalIC = "\x00"
    41  )
    42  
    43  type (
    44  	regIC struct {
    45  		nl    nl.Listener
    46  		smap  *smapX
    47  		query url.Values
    48  		msg   any
    49  	}
    50  
    51  	xactRegMsg struct {
    52  		UUID string   `json:"uuid"`
    53  		Kind string   `json:"kind"`
    54  		Srcs []string `json:"srcs"` // list of daemonIDs
    55  	}
    56  
    57  	icBundle struct {
    58  		Smap         *smapX              `json:"smap"`
    59  		OwnershipTbl jsoniter.RawMessage `json:"ownership_table"`
    60  	}
    61  
    62  	ic struct {
    63  		p *proxy
    64  	}
    65  )
    66  
    67  func (ic *ic) init(p *proxy) {
    68  	ic.p = p
    69  }
    70  
    71  func (ic *ic) reverseToOwner(w http.ResponseWriter, r *http.Request, uuid string, msg any) (reversedOrFailed bool) {
    72  	retry := true
    73  begin:
    74  	var (
    75  		smap          = ic.p.owner.smap.get()
    76  		selfIC        = smap.IsIC(ic.p.si)
    77  		owner, exists = ic.p.notifs.getOwner(uuid)
    78  		psi           *meta.Snode
    79  	)
    80  	if exists {
    81  		goto outer
    82  	}
    83  	if selfIC {
    84  		if !exists && !retry {
    85  			err := fmt.Errorf("x-[%s] not found (%s)", uuid, smap.StrIC(ic.p.si))
    86  			ic.p.writeErr(w, r, err, http.StatusNotFound, Silent)
    87  			return true
    88  		}
    89  		if retry {
    90  			withRetry(cmn.Rom.CplaneOperation(), func() bool {
    91  				owner, exists = ic.p.notifs.getOwner(uuid)
    92  				return exists
    93  			})
    94  			if !exists {
    95  				retry = false
    96  				_ = ic.syncICBundle() // TODO handle error
    97  				goto begin
    98  			}
    99  		}
   100  	} else {
   101  		hrwOwner, err := smap.HrwIC(uuid)
   102  		if err != nil {
   103  			ic.p.writeErr(w, r, err, http.StatusInternalServerError)
   104  			return true
   105  		}
   106  		owner = hrwOwner.ID()
   107  	}
   108  outer:
   109  	switch owner {
   110  	case "": // not owned
   111  		return
   112  	case equalIC:
   113  		if selfIC {
   114  			owner = ic.p.SID()
   115  		} else {
   116  			for pid, si := range smap.Pmap {
   117  				if !smap.IsIC(psi) {
   118  					continue
   119  				}
   120  				owner = pid
   121  				psi = si
   122  				break outer
   123  			}
   124  		}
   125  	default: // cached + owned
   126  		psi = smap.GetProxy(owner)
   127  		if psi == nil || !smap.IsIC(psi) {
   128  			var err error
   129  			if psi, err = smap.HrwIC(uuid); err != nil {
   130  				ic.p.writeErr(w, r, err, http.StatusInternalServerError)
   131  				return true
   132  			}
   133  		}
   134  		debug.Assertf(smap.IsIC(psi), "%s, %s", psi, smap.StrIC(ic.p.si))
   135  	}
   136  	if owner == ic.p.SID() {
   137  		return
   138  	}
   139  	// otherwise, hand it over
   140  	if msg != nil {
   141  		body := cos.MustMarshal(msg)
   142  		r.ContentLength = int64(len(body))
   143  		r.Body = io.NopCloser(bytes.NewReader(body))
   144  	}
   145  	ic.p.reverseNodeRequest(w, r, psi)
   146  	return true
   147  }
   148  
   149  func (ic *ic) redirectToIC(w http.ResponseWriter, r *http.Request) bool {
   150  	smap := ic.p.owner.smap.get()
   151  	if smap.IsIC(ic.p.si) {
   152  		return false
   153  	}
   154  
   155  	var node *meta.Snode
   156  	for _, psi := range smap.Pmap {
   157  		if smap.IsIC(psi) {
   158  			node = psi
   159  			break
   160  		}
   161  	}
   162  	redirectURL := ic.p.redirectURL(r, node, time.Now(), cmn.NetIntraControl)
   163  	http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
   164  	return true
   165  }
   166  
   167  func (ic *ic) xstatusAll(w http.ResponseWriter, r *http.Request, query url.Values) {
   168  	msg := &xact.QueryMsg{}
   169  	if err := cmn.ReadJSON(w, r, msg); err != nil {
   170  		return
   171  	}
   172  	flt := nlFilter{ID: msg.ID, Kind: msg.Kind, Bck: (*meta.Bck)(&msg.Bck), OnlyRunning: msg.OnlyRunning}
   173  	if !msg.Bck.IsEmpty() {
   174  		flt.Bck = (*meta.Bck)(&msg.Bck)
   175  	}
   176  
   177  	var (
   178  		vec nl.StatusVec
   179  		nls = ic.p.notifs.findAll(flt)
   180  	)
   181  	if cos.IsParseBool(query.Get(apc.QparamForce)) {
   182  		// (force just-in-time)
   183  		// for each args-selected xaction:
   184  		// check if any of the targets delayed updating the corresponding status,
   185  		// and query those targets directly
   186  		var (
   187  			config   = cmn.GCO.Get()
   188  			interval = config.Periodic.NotifTime.D()
   189  		)
   190  		for _, nl := range nls {
   191  			ic.p.notifs.bcastGetStats(nl, interval)
   192  			status := nl.Status()
   193  			if err := nl.Err(); err != nil {
   194  				status.ErrMsg = err.Error()
   195  			}
   196  			vec = append(vec, *status)
   197  		}
   198  	} else {
   199  		for _, nl := range nls {
   200  			vec = append(vec, *nl.Status())
   201  		}
   202  	}
   203  	b := cos.MustMarshal(vec)
   204  	w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(b)))
   205  	w.Write(b)
   206  }
   207  
   208  func (ic *ic) xstatusOne(w http.ResponseWriter, r *http.Request) {
   209  	var (
   210  		nl  nl.Listener
   211  		bck *meta.Bck
   212  		msg = &xact.QueryMsg{}
   213  	)
   214  	if err := cmn.ReadJSON(w, r, msg); err != nil {
   215  		return
   216  	}
   217  	msg.Kind, _ = xact.GetKindName(msg.Kind) // display name => kind
   218  	if msg.ID == "" && msg.Kind == "" {
   219  		ic.p.writeErrStatusf(w, r, http.StatusBadRequest, "invalid %s", msg)
   220  		return
   221  	}
   222  
   223  	// for queries of the type {Kind: apc.ActRebalance}
   224  	if msg.ID == "" && ic.redirectToIC(w, r) {
   225  		return
   226  	}
   227  	if msg.ID != "" && ic.reverseToOwner(w, r, msg.ID, msg) {
   228  		return
   229  	}
   230  
   231  	if msg.Bck.Name != "" {
   232  		bck = meta.CloneBck(&msg.Bck)
   233  		if err := bck.Init(ic.p.owner.bmd); err != nil {
   234  			ic.p.writeErr(w, r, err, http.StatusNotFound, Silent)
   235  			return
   236  		}
   237  	}
   238  	flt := nlFilter{ID: msg.ID, Kind: msg.Kind, Bck: bck, OnlyRunning: msg.OnlyRunning}
   239  	withRetry(cmn.Rom.CplaneOperation(), func() bool {
   240  		nl = ic.p.notifs.find(flt)
   241  		return nl != nil
   242  	})
   243  	if nl == nil {
   244  		smap := ic.p.owner.smap.get()
   245  		err := fmt.Errorf("nl not found: %s, %s", smap.StrIC(ic.p.si), msg)
   246  		ic.p.writeErr(w, r, err, http.StatusNotFound, Silent)
   247  		return
   248  	}
   249  
   250  	if msg.Kind != "" && nl.Kind() != msg.Kind {
   251  		ic.p.writeErrf(w, r, "kind mismatch: %s, expected kind=%s", msg, nl.Kind())
   252  		return
   253  	}
   254  
   255  	// refresh NotifStatus
   256  	var (
   257  		config   = cmn.GCO.Get()
   258  		interval = config.Periodic.NotifTime.D()
   259  	)
   260  	ic.p.notifs.bcastGetStats(nl, interval)
   261  
   262  	status := nl.Status()
   263  	if err := nl.Err(); err != nil {
   264  		status.ErrMsg = err.Error()
   265  	}
   266  	b := cos.MustMarshal(status) // TODO: include stats, e.g., progress when ready
   267  	w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(b)))
   268  	w.Write(b)
   269  }
   270  
   271  // verb /v1/ic
   272  func (ic *ic) handler(w http.ResponseWriter, r *http.Request) {
   273  	switch r.Method {
   274  	case http.MethodGet:
   275  		ic.handleGet(w, r)
   276  	case http.MethodPost:
   277  		ic.handlePost(w, r)
   278  	default:
   279  		debug.Assert(false)
   280  	}
   281  }
   282  
   283  // GET /v1/ic
   284  func (ic *ic) handleGet(w http.ResponseWriter, r *http.Request) {
   285  	var (
   286  		smap = ic.p.owner.smap.get()
   287  		what = r.URL.Query().Get(apc.QparamWhat)
   288  	)
   289  	if !smap.IsIC(ic.p.si) {
   290  		ic.p.writeErrf(w, r, "%s: not an IC member", ic.p.si)
   291  		return
   292  	}
   293  
   294  	switch what {
   295  	case apc.WhatICBundle:
   296  		bundle := icBundle{Smap: smap, OwnershipTbl: cos.MustMarshal(&ic.p.notifs)}
   297  		ic.p.writeJSON(w, r, bundle, what)
   298  	default:
   299  		ic.p.writeErrf(w, r, fmtUnknownQue, what)
   300  	}
   301  }
   302  
   303  // POST /v1/ic
   304  func (ic *ic) handlePost(w http.ResponseWriter, r *http.Request) {
   305  	var (
   306  		smap = ic.p.owner.smap.get()
   307  		msg  = &aisMsg{}
   308  	)
   309  	if err := cmn.ReadJSON(w, r, msg); err != nil {
   310  		return
   311  	}
   312  	if !smap.IsIC(ic.p.si) {
   313  		if !withRetry(cmn.Rom.CplaneOperation(), func() bool {
   314  			smap = ic.p.owner.smap.get()
   315  			return smap.IsIC(ic.p.si)
   316  		}) {
   317  			ic.p.writeErrf(w, r, "%s: not an IC member", ic.p.si)
   318  			return
   319  		}
   320  	}
   321  
   322  	switch msg.Action {
   323  	case apc.ActMergeOwnershipTbl:
   324  		if err := cos.MorphMarshal(msg.Value, &ic.p.notifs); err != nil {
   325  			ic.p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, ic.p.si, msg.Action, msg.Value, err)
   326  			return
   327  		}
   328  	case apc.ActListenToNotif:
   329  		nlMsg := &notifListenMsg{}
   330  		if err := cos.MorphMarshal(msg.Value, nlMsg); err != nil {
   331  			ic.p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, ic.p.si, msg.Action, msg.Value, err)
   332  			return
   333  		}
   334  		if err := ic.p.notifs.add(nlMsg.nl); err != nil {
   335  			ic.p.writeErr(w, r, err)
   336  			return
   337  		}
   338  	case apc.ActRegGlobalXaction:
   339  		var (
   340  			regMsg     = &xactRegMsg{}
   341  			tmap       meta.NodeMap
   342  			callerSver = r.Header.Get(apc.HdrCallerSmapVer)
   343  			err        error
   344  		)
   345  		if err = cos.MorphMarshal(msg.Value, regMsg); err != nil {
   346  			ic.p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, ic.p.si, msg.Action, msg.Value, err)
   347  			return
   348  		}
   349  		debug.Assert(len(regMsg.Srcs) != 0)
   350  		withRetry(cmn.Rom.CplaneOperation(), func() bool {
   351  			smap = ic.p.owner.smap.get()
   352  			tmap, err = smap.NewTmap(regMsg.Srcs)
   353  			return err == nil && callerSver == smap.vstr
   354  		})
   355  		if err != nil {
   356  			ic.p.writeErrStatusf(w, r, http.StatusNotFound, "%s: failed to %q: %v", ic.p, msg.Action, err)
   357  			return
   358  		}
   359  		nl := xact.NewXactNL(regMsg.UUID, regMsg.Kind, &smap.Smap, tmap)
   360  		if err = ic.p.notifs.add(nl); err != nil {
   361  			ic.p.writeErr(w, r, err)
   362  			return
   363  		}
   364  	default:
   365  		ic.p.writeErrAct(w, r, msg.Action)
   366  	}
   367  }
   368  
   369  func (ic *ic) registerEqual(a regIC) {
   370  	if a.query != nil {
   371  		a.query.Set(apc.QparamNotifyMe, equalIC)
   372  	}
   373  	if a.smap.IsIC(ic.p.si) {
   374  		err := ic.p.notifs.add(a.nl)
   375  		debug.AssertNoErr(err)
   376  	}
   377  	if a.smap.ICCount() > 1 {
   378  		ic.bcastListenIC(a.nl)
   379  	}
   380  }
   381  
   382  func (ic *ic) bcastListenIC(nl nl.Listener) {
   383  	var (
   384  		actMsg = apc.ActMsg{Action: apc.ActListenToNotif, Value: newNLMsg(nl)}
   385  		msg    = ic.p.newAmsg(&actMsg, nil)
   386  	)
   387  	ic.p.bcastAsyncIC(msg)
   388  }
   389  
   390  func (ic *ic) sendOwnershipTbl(si *meta.Snode, smap *smapX) error {
   391  	if ic.p.notifs.size() == 0 {
   392  		if cmn.Rom.FastV(4, cos.SmoduleAIS) {
   393  			nlog.Infof("%s: notifs empty, not sending to %s", ic.p, si)
   394  		}
   395  		return nil
   396  	}
   397  	msg := ic.p.newAmsgActVal(apc.ActMergeOwnershipTbl, &ic.p.notifs)
   398  	cargs := allocCargs()
   399  	{
   400  		cargs.si = si
   401  		cargs.req = cmn.HreqArgs{Method: http.MethodPost, Path: apc.URLPathIC.S, Body: cos.MustMarshal(msg)}
   402  		cargs.timeout = cmn.Rom.CplaneOperation()
   403  	}
   404  	res := ic.p.call(cargs, smap)
   405  	freeCargs(cargs)
   406  	return res.err
   407  }
   408  
   409  // sync ownership table; TODO: review control flows and revisit impl.
   410  func (ic *ic) syncICBundle() error {
   411  	smap := ic.p.owner.smap.get()
   412  	si := ic.p.si
   413  	for _, psi := range smap.Pmap {
   414  		if smap.IsIC(psi) && psi.ID() != si.ID() {
   415  			si = psi
   416  			break
   417  		}
   418  	}
   419  
   420  	if si.Eq(ic.p.si) {
   421  		return nil
   422  	}
   423  	cargs := allocCargs()
   424  	{
   425  		cargs.si = si
   426  		cargs.req = cmn.HreqArgs{
   427  			Method: http.MethodGet,
   428  			Path:   apc.URLPathIC.S,
   429  			Query:  url.Values{apc.QparamWhat: []string{apc.WhatICBundle}},
   430  		}
   431  		cargs.timeout = cmn.Rom.CplaneOperation()
   432  		cargs.cresv = cresIC{} // -> icBundle
   433  	}
   434  	res := ic.p.call(cargs, smap)
   435  	freeCargs(cargs)
   436  	if res.err != nil {
   437  		return res.err
   438  	}
   439  
   440  	bundle := res.v.(*icBundle)
   441  	debug.Assertf(smap.UUID == bundle.Smap.UUID, "%s vs %s", smap.StringEx(), bundle.Smap.StringEx())
   442  
   443  	if err := ic.p.owner.smap.synchronize(ic.p.si, bundle.Smap, nil /*ms payload*/, ic.p.htrun.smapUpdatedCB); err != nil {
   444  		if !isErrDowngrade(err) {
   445  			nlog.Errorln(cmn.NewErrFailedTo(ic.p, "sync", bundle.Smap, err))
   446  		}
   447  	} else {
   448  		smap = ic.p.owner.smap.get()
   449  		nlog.Infof("%s: synch %s", ic.p, smap)
   450  	}
   451  
   452  	if !smap.IsIC(ic.p.si) {
   453  		return nil
   454  	}
   455  	if err := jsoniter.Unmarshal(bundle.OwnershipTbl, &ic.p.notifs); err != nil {
   456  		return fmt.Errorf(cmn.FmtErrUnmarshal, ic.p, "ownership table", cos.BHead(bundle.OwnershipTbl), err)
   457  	}
   458  	return nil
   459  }