github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/api/xaction.go (about) 1 // Package api provides native Go-based API/SDK over HTTP(S). 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package api 6 7 import ( 8 "fmt" 9 "net/http" 10 "net/url" 11 "time" 12 13 "github.com/NVIDIA/aistore/api/apc" 14 "github.com/NVIDIA/aistore/cmn" 15 "github.com/NVIDIA/aistore/cmn/cos" 16 "github.com/NVIDIA/aistore/cmn/debug" 17 "github.com/NVIDIA/aistore/cmn/mono" 18 "github.com/NVIDIA/aistore/nl" 19 "github.com/NVIDIA/aistore/xact" 20 ) 21 22 // Start xaction 23 func StartXaction(bp BaseParams, args *xact.ArgsMsg, extra string) (xid string, err error) { 24 if !xact.Table[args.Kind].Startable { 25 return "", fmt.Errorf("xaction %q is not startable", args.Kind) 26 } 27 q := args.Bck.NewQuery() 28 if args.Force { 29 q.Set(apc.QparamForce, "true") 30 } 31 msg := apc.ActMsg{Action: apc.ActXactStart, Value: args, Name: extra} 32 bp.Method = http.MethodPut 33 reqParams := AllocRp() 34 { 35 reqParams.BaseParams = bp 36 reqParams.Path = apc.URLPathClu.S 37 reqParams.Body = cos.MustMarshal(msg) 38 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 39 reqParams.Query = q 40 } 41 _, err = reqParams.doReqStr(&xid) 42 FreeRp(reqParams) 43 return 44 } 45 46 // Abort ("stop") xactions 47 func AbortXaction(bp BaseParams, args *xact.ArgsMsg) (err error) { 48 msg := apc.ActMsg{Action: apc.ActXactStop, Value: args} 49 bp.Method = http.MethodPut 50 reqParams := AllocRp() 51 { 52 reqParams.BaseParams = bp 53 reqParams.Path = apc.URLPathClu.S 54 reqParams.Body = cos.MustMarshal(msg) 55 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 56 reqParams.Query = args.Bck.NewQuery() 57 } 58 err = reqParams.DoRequest() 59 FreeRp(reqParams) 60 return 61 } 62 63 // 64 // querying and waiting 65 // 66 67 // returns a slice of canonical xaction names, as in: `xact.Cname()` 68 // e.g.: put-copies[D-ViE6HEL_j] list[H96Y7bhR2s] copy-bck[matRQMRes] put-copies[pOibtHExY] 69 // TODO: return idle xactions separately 70 func GetAllRunningXactions(bp BaseParams, kindOrName string) (out []string, err error) { 71 msg := xact.QueryMsg{Kind: kindOrName} 72 bp.Method = http.MethodGet 73 reqParams := AllocRp() 74 { 75 reqParams.BaseParams = bp 76 reqParams.Path = apc.URLPathClu.S 77 reqParams.Body = cos.MustMarshal(msg) 78 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 79 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatAllRunningXacts}} 80 } 81 _, err = reqParams.DoReqAny(&out) 82 FreeRp(reqParams) 83 return 84 } 85 86 // QueryXactionSnaps gets all xaction snaps based on the specified selection. 87 // NOTE: args.Kind can be either xaction kind or name - here and elsewhere 88 func QueryXactionSnaps(bp BaseParams, args *xact.ArgsMsg) (xs xact.MultiSnap, err error) { 89 msg := xact.QueryMsg{ID: args.ID, Kind: args.Kind, Bck: args.Bck} 90 if args.OnlyRunning { 91 msg.OnlyRunning = apc.Ptr(true) 92 } 93 bp.Method = http.MethodGet 94 reqParams := AllocRp() 95 { 96 reqParams.BaseParams = bp 97 reqParams.Path = apc.URLPathClu.S 98 reqParams.Body = cos.MustMarshal(msg) 99 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 100 reqParams.Query = url.Values{apc.QparamWhat: []string{apc.WhatQueryXactStats}} 101 } 102 _, err = reqParams.DoReqAny(&xs) 103 FreeRp(reqParams) 104 return 105 } 106 107 // GetOneXactionStatus queries one of the IC (proxy) members for status 108 // of the `args`-identified xaction. 109 // NOTE: 110 // - is used internally by the WaitForXactionIC() helper function (to wait on xaction) 111 // - returns a single matching xaction or none; 112 // - when the `args` filter "covers" multiple xactions the returned status corresponds to 113 // any matching xaction that's currently running, or - if nothing's running - 114 // the one that's finished most recently, 115 // if exists 116 func GetOneXactionStatus(bp BaseParams, args *xact.ArgsMsg) (status *nl.Status, err error) { 117 status = &nl.Status{} 118 q := url.Values{apc.QparamWhat: []string{apc.WhatOneXactStatus}} 119 err = getxst(status, q, bp, args) 120 return 121 } 122 123 // same as above, except that it returns _all_ matching xactions 124 func GetAllXactionStatus(bp BaseParams, args *xact.ArgsMsg) (matching nl.StatusVec, err error) { 125 q := url.Values{apc.QparamWhat: []string{apc.WhatAllXactStatus}} 126 if args.Force { 127 // (force just-in-time) 128 // for each args-selected xaction: 129 // check if any of the targets delayed updating the corresponding status, 130 // and query those targets directly 131 q.Set(apc.QparamForce, "true") 132 } 133 err = getxst(&matching, q, bp, args) 134 return 135 } 136 137 func getxst(out any, q url.Values, bp BaseParams, args *xact.ArgsMsg) (err error) { 138 bp.Method = http.MethodGet 139 msg := xact.QueryMsg{ID: args.ID, Kind: args.Kind, Bck: args.Bck} 140 if args.OnlyRunning { 141 msg.OnlyRunning = apc.Ptr(true) 142 } 143 reqParams := AllocRp() 144 { 145 reqParams.BaseParams = bp 146 reqParams.Path = apc.URLPathClu.S 147 reqParams.Body = cos.MustMarshal(msg) 148 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 149 reqParams.Query = q 150 } 151 _, err = reqParams.DoReqAny(out) 152 FreeRp(reqParams) 153 return 154 } 155 156 // 157 // TODO: use `xact.IdlesBeforeFinishing` to provide a single unified wait-for API 158 // 159 160 type consIdle struct { 161 xid string 162 cnt int 163 delayed bool 164 } 165 166 func (ci *consIdle) check(snaps xact.MultiSnap) (done, resetProbeFreq bool) { 167 aborted, running, notstarted := snaps.IsIdle(ci.xid) 168 if aborted { 169 return true, false 170 } 171 if running { 172 ci.cnt = 0 173 return false, false 174 } 175 if notstarted && ci.cnt == 0 { 176 if !ci.delayed { 177 time.Sleep(min(2*xact.MinPollTime, 4*time.Second)) 178 ci.delayed = true 179 } 180 return false, false 181 } 182 // is idle 183 ci.cnt++ 184 return ci.cnt >= xact.NumConsecutiveIdle, true 185 } 186 187 // WaitForXactionIdle waits for a given on-demand xaction to be idle. 188 func WaitForXactionIdle(bp BaseParams, args *xact.ArgsMsg) (err error) { 189 ci, running := &consIdle{xid: args.ID}, args.OnlyRunning 190 args.OnlyRunning = true 191 err = WaitForXactionNode(bp, args, ci.check) 192 args.OnlyRunning = running 193 return err 194 } 195 196 // WaitForXactionIC waits for a given xaction to complete. 197 // Use it only for global xactions 198 // (those that execute on all targets and report their status to IC, e.g. rebalance). 199 func WaitForXactionIC(bp BaseParams, args *xact.ArgsMsg) (status *nl.Status, err error) { 200 return _waitx(bp, args, nil) 201 } 202 203 // WaitForXactionNode waits for a given xaction to complete. 204 // Use for xactions that do _not_ report their status to IC members, namely: 205 // - xact.IdlesBeforeFinishing() 206 // - x-resilver (as it usually runs on a single node) 207 func WaitForXactionNode(bp BaseParams, args *xact.ArgsMsg, fn func(xact.MultiSnap) (bool, bool)) error { 208 debug.Assert(args.Kind != "" || xact.IsValidUUID(args.ID)) 209 _, err := _waitx(bp, args, fn) 210 return err 211 } 212 213 // TODO: `status` is currently always nil when we wait with a (`fn`) callback 214 // TODO: un-defer cancel() 215 func _waitx(bp BaseParams, args *xact.ArgsMsg, fn func(xact.MultiSnap) (bool, bool)) (status *nl.Status, err error) { 216 var ( 217 elapsed time.Duration 218 begin = mono.NanoTime() 219 total, maxSleep = _times(args) 220 sleep = xact.MinPollTime 221 ) 222 for { 223 var done bool 224 if fn == nil { 225 status, err = GetOneXactionStatus(bp, args) 226 done = err == nil && status.Finished() && elapsed >= xact.MinPollTime 227 } else { 228 var ( 229 snaps xact.MultiSnap 230 resetProbeFreq bool 231 ) 232 snaps, err = QueryXactionSnaps(bp, args) 233 if err == nil { 234 done, resetProbeFreq = fn(snaps) 235 if resetProbeFreq { 236 sleep = xact.MinPollTime 237 } 238 } 239 } 240 canRetry := err == nil || cos.IsRetriableConnErr(err) || cmn.IsStatusServiceUnavailable(err) 241 if done || !canRetry /*fail*/ { 242 return 243 } 244 time.Sleep(sleep) 245 sleep = min(maxSleep, sleep+sleep/2) 246 247 if elapsed = mono.Since(begin); elapsed >= total { 248 err = fmt.Errorf("api.wait: timed out (%v) waiting for %s", total, args.String()) 249 return 250 } 251 } 252 } 253 254 func _times(args *xact.ArgsMsg) (time.Duration, time.Duration) { 255 total := args.Timeout 256 switch { 257 case args.Timeout == 0: 258 total = xact.DefWaitTimeShort 259 case args.Timeout < 0: 260 total = xact.DefWaitTimeLong 261 } 262 return total, min(xact.MaxProbingFreq, cos.ProbingFrequency(total)) 263 }