github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/transport/sendobj.go (about) 1 // Package transport provides long-lived http/tcp connections for 2 // intra-cluster communications (see README for details and usage example). 3 /* 4 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package transport 7 8 import ( 9 "fmt" 10 "io" 11 "runtime" 12 13 "github.com/NVIDIA/aistore/cmn" 14 "github.com/NVIDIA/aistore/cmn/cos" 15 "github.com/NVIDIA/aistore/cmn/debug" 16 "github.com/NVIDIA/aistore/cmn/nlog" 17 "github.com/NVIDIA/aistore/memsys" 18 "github.com/pierrec/lz4/v3" 19 ) 20 21 // object stream & private types 22 type ( 23 Stream struct { 24 workCh chan *Obj // aka SQ: next object to stream 25 cmplCh chan cmpl // aka SCQ; note that SQ and SCQ together form a FIFO 26 callback ObjSentCB // to free SGLs, close files, etc. 27 sendoff sendoff 28 lz4s lz4Stream 29 streamBase 30 } 31 lz4Stream struct { 32 s *Stream 33 zw *lz4.Writer // orig reader => zw 34 sgl *memsys.SGL // zw => bb => network 35 blockMaxSize int // *uncompressed* block max size 36 frameChecksum bool // true: checksum lz4 frames 37 } 38 sendoff struct { 39 obj Obj 40 off int64 41 ins int // in-send enum 42 } 43 cmpl struct { 44 err error 45 obj Obj 46 } 47 ) 48 49 // interface guard 50 var _ streamer = (*Stream)(nil) 51 52 /////////////////// 53 // object stream // 54 /////////////////// 55 56 func (s *Stream) terminate(err error, reason string) (actReason string, actErr error) { 57 ok := s.term.done.CAS(false, true) 58 debug.Assert(ok, s.String()) 59 60 s.term.mu.Lock() 61 if s.term.err == nil { 62 s.term.err = err 63 } 64 if s.term.reason == "" { 65 s.term.reason = reason 66 } 67 s.Stop() 68 err = s.term.err 69 actReason, actErr = s.term.reason, s.term.err 70 s.cmplCh <- cmpl{err, Obj{Hdr: ObjHdr{Opcode: opcFin}}} 71 s.term.mu.Unlock() 72 73 // Remove stream after lock because we could deadlock between `do()` 74 // (which checks for `Terminated` status) and this function which 75 // would be under lock. 76 gc.remove(&s.streamBase) 77 78 if s.compressed() { 79 s.lz4s.sgl.Free() 80 if s.lz4s.zw != nil { 81 s.lz4s.zw.Reset(nil) 82 } 83 } 84 return 85 } 86 87 func (s *Stream) initCompression(extra *Extra) { 88 s.lz4s.s = s 89 s.lz4s.blockMaxSize = int(extra.Config.Transport.LZ4BlockMaxSize) 90 s.lz4s.frameChecksum = extra.Config.Transport.LZ4FrameChecksum 91 if s.lz4s.blockMaxSize >= memsys.MaxPageSlabSize { 92 s.lz4s.sgl = g.mm.NewSGL(memsys.MaxPageSlabSize, memsys.MaxPageSlabSize) 93 } else { 94 s.lz4s.sgl = g.mm.NewSGL(cos.KiB*64, cos.KiB*64) 95 } 96 s.lid = fmt.Sprintf("%s[%d[%s]]", s.trname, s.sessID, cos.ToSizeIEC(int64(s.lz4s.blockMaxSize), 0)) 97 } 98 99 func (s *Stream) compressed() bool { return s.lz4s.s == s } 100 func (s *Stream) usePDU() bool { return s.pdu != nil } 101 102 func (s *Stream) resetCompression() { 103 s.lz4s.sgl.Reset() 104 s.lz4s.zw.Reset(nil) 105 } 106 107 func (s *Stream) cmplLoop() { 108 for { 109 cmpl, ok := <-s.cmplCh 110 obj := &cmpl.obj 111 if !ok || obj.Hdr.isFin() { 112 break 113 } 114 s.doCmpl(&cmpl.obj, cmpl.err) 115 } 116 s.wg.Done() 117 } 118 119 // handle the last interrupted transmission and pending SQ/SCQ 120 func (s *Stream) abortPending(err error, completions bool) { 121 for obj := range s.workCh { 122 s.doCmpl(obj, err) 123 } 124 if completions { 125 for cmpl := range s.cmplCh { 126 if !cmpl.obj.Hdr.isFin() { 127 s.doCmpl(&cmpl.obj, cmpl.err) 128 } 129 } 130 } 131 } 132 133 // refcount to invoke the has-been-sent callback only once 134 // and *always* close the reader (sic!) 135 func (s *Stream) doCmpl(obj *Obj, err error) { 136 var rc int64 137 if obj.prc != nil { 138 rc = obj.prc.Dec() 139 debug.Assert(rc >= 0) 140 } 141 if obj.Reader != nil { 142 if err != nil && cmn.IsFileAlreadyClosed(err) { 143 nlog.Errorf("%s %s: %v", s, obj, err) 144 } else { 145 cos.Close(obj.Reader) // otherwise, always closing 146 } 147 } 148 // SCQ completion callback 149 if rc == 0 { 150 if obj.Callback != nil { 151 obj.Callback(&obj.Hdr, obj.Reader, obj.CmplArg, err) 152 } else if s.callback != nil { 153 s.callback(&obj.Hdr, obj.Reader, obj.CmplArg, err) 154 } 155 } 156 freeSend(obj) 157 } 158 159 func (s *Stream) doRequest() error { 160 s.numCur, s.sizeCur = 0, 0 161 if !s.compressed() { 162 return s.do(s) 163 } 164 s.lz4s.sgl.Reset() 165 if s.lz4s.zw == nil { 166 s.lz4s.zw = lz4.NewWriter(s.lz4s.sgl) 167 } else { 168 s.lz4s.zw.Reset(s.lz4s.sgl) 169 } 170 // lz4 framing spec at http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html 171 s.lz4s.zw.Header.BlockChecksum = false 172 s.lz4s.zw.Header.NoChecksum = !s.lz4s.frameChecksum 173 s.lz4s.zw.Header.BlockMaxSize = s.lz4s.blockMaxSize 174 return s.do(&s.lz4s) 175 } 176 177 // as io.Reader 178 func (s *Stream) Read(b []byte) (n int, err error) { 179 s.time.inSend.Store(true) // for collector to delay cleanup 180 if !s.inSend() { // true when transmitting s.sendoff.obj 181 goto repeat 182 } 183 switch s.sendoff.ins { 184 case inData: 185 obj := &s.sendoff.obj 186 if !obj.IsHeaderOnly() { 187 return s.sendData(b) 188 } 189 if obj.Hdr.isFin() { 190 err = io.EOF 191 return 192 } 193 s.eoObj(nil) 194 case inPDU: 195 for !s.pdu.done { 196 err = s.pdu.readFrom(&s.sendoff) 197 if s.pdu.done { 198 s.pdu.insHeader() 199 break 200 } 201 } 202 if s.pdu.rlength() > 0 { 203 n = s.sendPDU(b) 204 if s.pdu.rlength() == 0 { 205 s.sendoff.off += int64(s.pdu.slength()) 206 if s.pdu.last { 207 s.eoObj(nil) 208 } 209 s.pdu.reset() 210 } 211 } 212 return 213 case inHdr: 214 return s.sendHdr(b) 215 } 216 repeat: 217 select { 218 case obj, ok := <-s.workCh: // next object OR idle tick 219 if !ok { 220 err = fmt.Errorf("%s closed prior to stopping", s) 221 nlog.Warningln(err) 222 return 223 } 224 s.sendoff.obj = *obj 225 obj = &s.sendoff.obj 226 if obj.Hdr.isIdleTick() { 227 if len(s.workCh) > 0 { 228 goto repeat 229 } 230 return s.deactivate() 231 } 232 l := insObjHeader(s.maxhdr, &obj.Hdr, s.usePDU()) 233 s.header = s.maxhdr[:l] 234 s.sendoff.ins = inHdr 235 return s.sendHdr(b) 236 case <-s.stopCh.Listen(): 237 if verbose { 238 nlog.Infof("%s: stopped (%d/%d)", s, s.numCur, s.stats.Num.Load()) 239 } 240 err = io.EOF 241 return 242 } 243 } 244 245 func (s *Stream) sendHdr(b []byte) (n int, err error) { 246 n = copy(b, s.header[s.sendoff.off:]) 247 s.sendoff.off += int64(n) 248 if s.sendoff.off < int64(len(s.header)) { 249 return 250 } 251 debug.Assert(s.sendoff.off == int64(len(s.header))) 252 s.stats.Offset.Add(s.sendoff.off) 253 if verbose { 254 num := s.stats.Num.Load() 255 nlog.Infof("%s: hlen=%d (%d/%d)", s, s.sendoff.off, s.numCur, num) 256 } 257 obj := &s.sendoff.obj 258 if s.usePDU() && !obj.IsHeaderOnly() { 259 s.sendoff.ins = inPDU 260 } else { 261 s.sendoff.ins = inData 262 } 263 s.sendoff.off = 0 264 if obj.Hdr.isFin() { 265 if verbose { 266 nlog.Infof("%s: sent last", s) 267 } 268 err = io.EOF 269 s.lastCh.Close() 270 } 271 return 272 } 273 274 func (s *Stream) sendData(b []byte) (n int, err error) { 275 var ( 276 obj = &s.sendoff.obj 277 objSize = obj.Size() 278 ) 279 n, err = obj.Reader.Read(b) 280 s.sendoff.off += int64(n) 281 if err != nil { 282 if err == io.EOF { 283 if s.sendoff.off < objSize { 284 return n, fmt.Errorf("%s: read (%d) shorter than size (%d)", s, s.sendoff.off, objSize) 285 } 286 err = nil 287 } 288 s.eoObj(err) 289 } else if s.sendoff.off >= objSize { 290 s.eoObj(err) 291 } 292 return 293 } 294 295 func (s *Stream) sendPDU(b []byte) (n int) { 296 n = s.pdu.read(b) 297 return 298 } 299 300 // end-of-object: 301 // - update stats, reset idle timeout, and post completion 302 // - note that reader.Close() is done by `doCmpl` 303 // TODO: ideally, there's a way to flush buffered data to the underlying connection :NOTE 304 func (s *Stream) eoObj(err error) { 305 obj := &s.sendoff.obj 306 objSize := obj.Size() 307 if obj.IsUnsized() { 308 objSize = s.sendoff.off 309 } 310 s.sizeCur += s.sendoff.off 311 s.stats.Offset.Add(s.sendoff.off) 312 if err != nil { 313 goto exit 314 } 315 if s.sendoff.off != objSize { 316 err = fmt.Errorf("%s: %s offset %d != size", s, obj, s.sendoff.off) 317 goto exit 318 } 319 // this stream stats 320 s.stats.Size.Add(objSize) 321 s.numCur++ 322 s.stats.Num.Inc() 323 if verbose { 324 nlog.Infof("%s: sent %s (%d/%d)", s, obj, s.numCur, s.stats.Num.Load()) 325 } 326 327 // target stats 328 g.tstats.Inc(OutObjCount) 329 g.tstats.Add(OutObjSize, objSize) 330 exit: 331 if err != nil { 332 nlog.Errorln(err) 333 } 334 335 // next completion => SCQ 336 s.cmplCh <- cmpl{err, s.sendoff.obj} 337 s.sendoff = sendoff{ins: inEOB} 338 } 339 340 func (s *Stream) inSend() bool { return s.sendoff.ins >= inHdr || s.sendoff.ins < inEOB } 341 342 func (s *Stream) dryrun() { 343 var ( 344 body = io.NopCloser(s) 345 h = &hdl{trname: s.trname} 346 it = iterator{handler: h, body: body, hbuf: make([]byte, dfltMaxHdr)} 347 ) 348 for { 349 hlen, flags, err := it.nextProtoHdr(s.String()) 350 if err == io.EOF { 351 break 352 } 353 debug.AssertNoErr(err) 354 debug.Assert(flags&msgFl == 0) 355 obj, err := it.nextObj(s.String(), hlen) 356 if obj != nil { 357 cos.DrainReader(obj) // TODO: recycle `objReader` here 358 continue 359 } 360 if err != nil { 361 break 362 } 363 } 364 } 365 366 func (s *Stream) errCmpl(err error) { 367 if s.inSend() { 368 s.cmplCh <- cmpl{err, s.sendoff.obj} 369 } 370 } 371 372 // gc: drain terminated stream 373 func (s *Stream) drain(err error) { 374 for { 375 select { 376 case obj := <-s.workCh: 377 s.doCmpl(obj, err) 378 default: 379 return 380 } 381 } 382 } 383 384 // gc: 385 func (s *Stream) closeAndFree() { 386 close(s.workCh) 387 close(s.cmplCh) 388 389 g.mm.Free(s.maxhdr) 390 if s.pdu != nil { 391 s.pdu.free(g.mm) 392 } 393 } 394 395 // gc: post idle tick if idle 396 func (s *Stream) idleTick() { 397 if len(s.workCh) == 0 && s.sessST.CAS(active, inactive) { 398 s.workCh <- &Obj{Hdr: ObjHdr{Opcode: opcIdleTick}} 399 if verbose { 400 nlog.Infof("%s: active => inactive", s) 401 } 402 } 403 } 404 405 /////////// 406 // Stats // 407 /////////// 408 409 func (stats *Stats) CompressionRatio() float64 { 410 bytesRead := stats.Offset.Load() 411 bytesSent := stats.CompressedSize.Load() 412 return float64(bytesRead) / float64(bytesSent) 413 } 414 415 /////////////// 416 // lz4Stream // 417 /////////////// 418 419 func (lz4s *lz4Stream) Read(b []byte) (n int, err error) { 420 var ( 421 sendoff = &lz4s.s.sendoff 422 last = sendoff.obj.Hdr.isFin() 423 retry = maxInReadRetries // insist on returning n > 0 (note that lz4 compresses /blocks/) 424 ) 425 if lz4s.sgl.Len() > 0 { 426 lz4s.zw.Flush() 427 n, err = lz4s.sgl.Read(b) 428 if err == io.EOF { // reusing/rewinding this buf multiple times 429 err = nil 430 } 431 goto ex 432 } 433 re: 434 n, err = lz4s.s.Read(b) 435 _, _ = lz4s.zw.Write(b[:n]) 436 if last { 437 lz4s.zw.Flush() 438 retry = 0 439 } else if lz4s.s.sendoff.ins == inEOB || err != nil { 440 lz4s.zw.Flush() 441 retry = 0 442 } 443 n, _ = lz4s.sgl.Read(b) 444 if n == 0 { 445 if retry > 0 { 446 retry-- 447 runtime.Gosched() 448 goto re 449 } 450 lz4s.zw.Flush() 451 n, _ = lz4s.sgl.Read(b) 452 } 453 ex: 454 lz4s.s.stats.CompressedSize.Add(int64(n)) 455 if lz4s.sgl.Len() == 0 { 456 lz4s.sgl.Reset() 457 } 458 if last && err == nil { 459 err = io.EOF 460 } 461 return 462 }