github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/transport/api.go (about) 1 // Package transport provides long-lived http/tcp connections for 2 // intra-cluster communications (see README for details and usage example). 3 /* 4 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package transport 7 8 import ( 9 "io" 10 "math" 11 "runtime" 12 "time" 13 "unsafe" 14 15 "github.com/NVIDIA/aistore/api/apc" 16 "github.com/NVIDIA/aistore/cmn" 17 "github.com/NVIDIA/aistore/cmn/atomic" 18 "github.com/NVIDIA/aistore/cmn/cos" 19 "github.com/NVIDIA/aistore/cmn/debug" 20 "github.com/NVIDIA/aistore/hk" 21 "github.com/NVIDIA/aistore/memsys" 22 ) 23 24 /////////////////// 25 // object stream // 26 /////////////////// 27 28 // range of 16 `Obj.Hdr.Opcode` and `Msg.Opcode` values 29 // reserved for _internal_ use 30 const ( 31 opcFin = iota + math.MaxUint16 - 16 32 opcIdleTick 33 ) 34 35 func ReservedOpcode(opc int) bool { return opc >= opcFin } 36 37 const ( 38 SizeUnknown = -1 39 40 dfltSizePDU = memsys.DefaultBufSize 41 maxSizePDU = memsys.MaxPageSlabSize 42 dfltSizeHeader = memsys.PageSize 43 maxSizeHeader = memsys.MaxPageSlabSize 44 ) 45 46 const sizeofh = int(unsafe.Sizeof(Obj{})) 47 48 type ( 49 // advanced usage: additional stream control 50 Extra struct { 51 Callback ObjSentCB // typical usage: to free SGLs, close files, etc. 52 Config *cmn.Config // (to optimize-out GCO.Get()) 53 Compression string // see CompressAlways, etc. enum 54 SenderID string // e.g., xaction ID (optional) 55 IdleTeardown time.Duration // when exceeded, causes PUT to terminate (and to renew upon the very next send) 56 SizePDU int32 // NOTE: 0(zero): no PDUs; must be below maxSizePDU; unknown size _requires_ PDUs 57 MaxHdrSize int32 // overrides `dfltMaxHdr` 58 WorkChBurst int // overrides `dfltBurstNum` 59 } 60 61 // receive-side session stats indexed by session ID (see recv.go for "uid") 62 // optional, currently tests only 63 RxStats map[uint64]*Stats 64 65 // object header 66 ObjHdr struct { 67 Bck cmn.Bck 68 ObjName string 69 SID string // sender node ID 70 Opaque []byte // custom control (optional) 71 ObjAttrs cmn.ObjAttrs // attributes/metadata of the object that's being transmitted 72 Opcode int // (see reserved range above) 73 } 74 // object to transmit 75 Obj struct { 76 Reader io.ReadCloser // reader (to read the object, and close when done) 77 CmplArg any // optional context passed to the ObjSentCB callback 78 Callback ObjSentCB // called when the last byte is sent _or_ when the stream terminates (see term.reason) 79 prc *atomic.Int64 // private; if present, ref-counts so that we call ObjSentCB only once 80 Hdr ObjHdr 81 } 82 83 // object-sent callback that has the following signature can optionally be defined on a: 84 // a) per-stream basis (via NewStream constructor - see Extra struct above) 85 // b) for a given object that is being sent (for instance, to support a call-per-batch semantics) 86 // Naturally, object callback "overrides" the per-stream one: when object callback is defined 87 // (i.e., non-nil), the stream callback is ignored/skipped. 88 // NOTE: if defined, the callback executes asynchronously as far as the sending part is concerned 89 ObjSentCB func(*ObjHdr, io.ReadCloser, any, error) 90 91 Msg struct { 92 SID string 93 Body []byte 94 Opcode int 95 } 96 97 // stream collector 98 StreamCollector struct{} 99 100 // Rx callbacks 101 RecvObj func(hdr *ObjHdr, objReader io.Reader, err error) error 102 RecvMsg func(msg Msg, err error) error 103 ) 104 105 /////////////////// 106 // object stream // 107 /////////////////// 108 109 func NewObjStream(client Client, dstURL, dstID string, extra *Extra) (s *Stream) { 110 if extra == nil { 111 extra = &Extra{Config: cmn.GCO.Get()} 112 } else if extra.Config == nil { 113 extra.Config = cmn.GCO.Get() 114 } 115 s = &Stream{streamBase: *newBase(client, dstURL, dstID, extra)} 116 s.streamBase.streamer = s 117 s.callback = extra.Callback 118 if extra.Compressed() { 119 s.initCompression(extra) 120 } 121 debug.Assert(s.usePDU() == extra.UsePDU()) 122 123 chsize := burst(extra) // num objects the caller can post without blocking 124 s.workCh = make(chan *Obj, chsize) // Send Qeueue (SQ) 125 s.cmplCh = make(chan cmpl, chsize) // Send Completion Queue (SCQ) 126 127 s.wg.Add(2) 128 go s.sendLoop(dryrun()) // handle SQ 129 go s.cmplLoop() // handle SCQ 130 131 gc.ctrlCh <- ctrl{&s.streamBase, true /* collect */} 132 return 133 } 134 135 // Asynchronously send an object (transport.Obj) defined by its header and its reader. 136 // 137 // The sending pipeline is implemented as a pair (SQ, SCQ) where the former is a send 138 // queue realized as workCh, and the latter is a send completion queue (cmplCh). 139 // Together SQ and SCQ form a FIFO. 140 // 141 // - header-only objects are supported; when there's no data to send (that is, 142 // when the header's Dsize field is set to zero), the reader is not required and the 143 // corresponding argument in Send() can be set to nil. 144 // - object reader is *always* closed irrespectively of whether the Send() succeeds 145 // or fails. On success, if send-completion (ObjSentCB) callback is provided 146 // (i.e., non-nil), the closing is done by doCmpl(). 147 // - Optional reference counting is also done by (and in) the doCmpl, so that the 148 // ObjSentCB gets called if and only when the refcount (if provided i.e., non-nil) 149 // reaches zero. 150 // - For every transmission of every object there's always an doCmpl() completion 151 // (with its refcounting and reader-closing). This holds true in all cases including 152 // network errors that may cause sudden and instant termination of the underlying 153 // stream(s). 154 func (s *Stream) Send(obj *Obj) (err error) { 155 debug.Assertf(len(obj.Hdr.Opaque) < len(s.maxhdr)-sizeofh, "(%d, %d)", len(obj.Hdr.Opaque), len(s.maxhdr)) 156 if err = s.startSend(obj); err != nil { 157 s.doCmpl(obj, err) // take a shortcut 158 return 159 } 160 161 s.workCh <- obj 162 if l, c := len(s.workCh), cap(s.workCh); l > c/2 { 163 runtime.Gosched() // poor man's throttle 164 if l == c { 165 s.chanFull.Inc() 166 } 167 } 168 return 169 } 170 171 func (s *Stream) Fin() { 172 _ = s.Send(&Obj{Hdr: ObjHdr{Opcode: opcFin}}) 173 s.wg.Wait() 174 } 175 176 ////////////////////// 177 // receive-side API // 178 ////////////////////// 179 180 func Handle(trname string, rxObj RecvObj, withStats ...bool) error { 181 var h handler 182 if len(withStats) > 0 && withStats[0] { 183 hkName := ObjURLPath(trname) 184 hex := &hdlExtra{hdl: hdl{trname: trname, rxObj: rxObj}, hkName: hkName} 185 hk.Reg(hkName+hk.NameSuffix, hex.cleanup, sessionIsOld) 186 h = hex 187 } else { 188 h = &hdl{trname: trname, rxObj: rxObj} 189 } 190 return oput(trname, h) 191 } 192 193 func Unhandle(trname string) error { return odel(trname) } 194 195 //////////////////// 196 // stats and misc // 197 //////////////////// 198 199 func ObjURLPath(trname string) string { return _urlPath(apc.ObjStream, trname) } 200 201 func _urlPath(endp, trname string) string { 202 if trname == "" { 203 return cos.JoinWords(apc.Version, endp) 204 } 205 return cos.JoinWords(apc.Version, endp, trname) 206 } 207 208 func GetRxStats() (netstats map[string]RxStats) { 209 netstats = make(map[string]RxStats) 210 for i, hmap := range hmaps { 211 hmtxs[i].Lock() 212 for trname, h := range hmap { 213 if s := h.getStats(); s != nil { 214 netstats[trname] = s 215 } 216 } 217 hmtxs[i].Unlock() 218 } 219 return 220 }