github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/transport/sendobj.go (about)

     1  // Package transport provides long-lived http/tcp connections for
     2  // intra-cluster communications (see README for details and usage example).
     3  /*
     4   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package transport
     7  
     8  import (
     9  	"fmt"
    10  	"io"
    11  	"runtime"
    12  
    13  	"github.com/NVIDIA/aistore/cmn"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/cmn/debug"
    16  	"github.com/NVIDIA/aistore/cmn/nlog"
    17  	"github.com/NVIDIA/aistore/memsys"
    18  	"github.com/pierrec/lz4/v3"
    19  )
    20  
    21  // object stream & private types
    22  type (
    23  	Stream struct {
    24  		workCh   chan *Obj // aka SQ: next object to stream
    25  		cmplCh   chan cmpl // aka SCQ; note that SQ and SCQ together form a FIFO
    26  		callback ObjSentCB // to free SGLs, close files, etc.
    27  		sendoff  sendoff
    28  		lz4s     lz4Stream
    29  		streamBase
    30  	}
    31  	lz4Stream struct {
    32  		s             *Stream
    33  		zw            *lz4.Writer // orig reader => zw
    34  		sgl           *memsys.SGL // zw => bb => network
    35  		blockMaxSize  int         // *uncompressed* block max size
    36  		frameChecksum bool        // true: checksum lz4 frames
    37  	}
    38  	sendoff struct {
    39  		obj Obj
    40  		off int64
    41  		ins int // in-send enum
    42  	}
    43  	cmpl struct {
    44  		err error
    45  		obj Obj
    46  	}
    47  )
    48  
    49  // interface guard
    50  var _ streamer = (*Stream)(nil)
    51  
    52  ///////////////////
    53  // object stream //
    54  ///////////////////
    55  
    56  func (s *Stream) terminate(err error, reason string) (actReason string, actErr error) {
    57  	ok := s.term.done.CAS(false, true)
    58  	debug.Assert(ok, s.String())
    59  
    60  	s.term.mu.Lock()
    61  	if s.term.err == nil {
    62  		s.term.err = err
    63  	}
    64  	if s.term.reason == "" {
    65  		s.term.reason = reason
    66  	}
    67  	s.Stop()
    68  	err = s.term.err
    69  	actReason, actErr = s.term.reason, s.term.err
    70  	s.cmplCh <- cmpl{err, Obj{Hdr: ObjHdr{Opcode: opcFin}}}
    71  	s.term.mu.Unlock()
    72  
    73  	// Remove stream after lock because we could deadlock between `do()`
    74  	// (which checks for `Terminated` status) and this function which
    75  	// would be under lock.
    76  	gc.remove(&s.streamBase)
    77  
    78  	if s.compressed() {
    79  		s.lz4s.sgl.Free()
    80  		if s.lz4s.zw != nil {
    81  			s.lz4s.zw.Reset(nil)
    82  		}
    83  	}
    84  	return
    85  }
    86  
    87  func (s *Stream) initCompression(extra *Extra) {
    88  	s.lz4s.s = s
    89  	s.lz4s.blockMaxSize = int(extra.Config.Transport.LZ4BlockMaxSize)
    90  	s.lz4s.frameChecksum = extra.Config.Transport.LZ4FrameChecksum
    91  	if s.lz4s.blockMaxSize >= memsys.MaxPageSlabSize {
    92  		s.lz4s.sgl = g.mm.NewSGL(memsys.MaxPageSlabSize, memsys.MaxPageSlabSize)
    93  	} else {
    94  		s.lz4s.sgl = g.mm.NewSGL(cos.KiB*64, cos.KiB*64)
    95  	}
    96  	s.lid = fmt.Sprintf("%s[%d[%s]]", s.trname, s.sessID, cos.ToSizeIEC(int64(s.lz4s.blockMaxSize), 0))
    97  }
    98  
    99  func (s *Stream) compressed() bool { return s.lz4s.s == s }
   100  func (s *Stream) usePDU() bool     { return s.pdu != nil }
   101  
   102  func (s *Stream) resetCompression() {
   103  	s.lz4s.sgl.Reset()
   104  	s.lz4s.zw.Reset(nil)
   105  }
   106  
   107  func (s *Stream) cmplLoop() {
   108  	for {
   109  		cmpl, ok := <-s.cmplCh
   110  		obj := &cmpl.obj
   111  		if !ok || obj.Hdr.isFin() {
   112  			break
   113  		}
   114  		s.doCmpl(&cmpl.obj, cmpl.err)
   115  	}
   116  	s.wg.Done()
   117  }
   118  
   119  // handle the last interrupted transmission and pending SQ/SCQ
   120  func (s *Stream) abortPending(err error, completions bool) {
   121  	for obj := range s.workCh {
   122  		s.doCmpl(obj, err)
   123  	}
   124  	if completions {
   125  		for cmpl := range s.cmplCh {
   126  			if !cmpl.obj.Hdr.isFin() {
   127  				s.doCmpl(&cmpl.obj, cmpl.err)
   128  			}
   129  		}
   130  	}
   131  }
   132  
   133  // refcount to invoke the has-been-sent callback only once
   134  // and *always* close the reader (sic!)
   135  func (s *Stream) doCmpl(obj *Obj, err error) {
   136  	var rc int64
   137  	if obj.prc != nil {
   138  		rc = obj.prc.Dec()
   139  		debug.Assert(rc >= 0)
   140  	}
   141  	if obj.Reader != nil {
   142  		if err != nil && cmn.IsFileAlreadyClosed(err) {
   143  			nlog.Errorf("%s %s: %v", s, obj, err)
   144  		} else {
   145  			cos.Close(obj.Reader) // otherwise, always closing
   146  		}
   147  	}
   148  	// SCQ completion callback
   149  	if rc == 0 {
   150  		if obj.Callback != nil {
   151  			obj.Callback(&obj.Hdr, obj.Reader, obj.CmplArg, err)
   152  		} else if s.callback != nil {
   153  			s.callback(&obj.Hdr, obj.Reader, obj.CmplArg, err)
   154  		}
   155  	}
   156  	freeSend(obj)
   157  }
   158  
   159  func (s *Stream) doRequest() error {
   160  	s.numCur, s.sizeCur = 0, 0
   161  	if !s.compressed() {
   162  		return s.do(s)
   163  	}
   164  	s.lz4s.sgl.Reset()
   165  	if s.lz4s.zw == nil {
   166  		s.lz4s.zw = lz4.NewWriter(s.lz4s.sgl)
   167  	} else {
   168  		s.lz4s.zw.Reset(s.lz4s.sgl)
   169  	}
   170  	// lz4 framing spec at http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html
   171  	s.lz4s.zw.Header.BlockChecksum = false
   172  	s.lz4s.zw.Header.NoChecksum = !s.lz4s.frameChecksum
   173  	s.lz4s.zw.Header.BlockMaxSize = s.lz4s.blockMaxSize
   174  	return s.do(&s.lz4s)
   175  }
   176  
   177  // as io.Reader
   178  func (s *Stream) Read(b []byte) (n int, err error) {
   179  	s.time.inSend.Store(true) // for collector to delay cleanup
   180  	if !s.inSend() {          // true when transmitting s.sendoff.obj
   181  		goto repeat
   182  	}
   183  	switch s.sendoff.ins {
   184  	case inData:
   185  		obj := &s.sendoff.obj
   186  		if !obj.IsHeaderOnly() {
   187  			return s.sendData(b)
   188  		}
   189  		if obj.Hdr.isFin() {
   190  			err = io.EOF
   191  			return
   192  		}
   193  		s.eoObj(nil)
   194  	case inPDU:
   195  		for !s.pdu.done {
   196  			err = s.pdu.readFrom(&s.sendoff)
   197  			if s.pdu.done {
   198  				s.pdu.insHeader()
   199  				break
   200  			}
   201  		}
   202  		if s.pdu.rlength() > 0 {
   203  			n = s.sendPDU(b)
   204  			if s.pdu.rlength() == 0 {
   205  				s.sendoff.off += int64(s.pdu.slength())
   206  				if s.pdu.last {
   207  					s.eoObj(nil)
   208  				}
   209  				s.pdu.reset()
   210  			}
   211  		}
   212  		return
   213  	case inHdr:
   214  		return s.sendHdr(b)
   215  	}
   216  repeat:
   217  	select {
   218  	case obj, ok := <-s.workCh: // next object OR idle tick
   219  		if !ok {
   220  			err = fmt.Errorf("%s closed prior to stopping", s)
   221  			nlog.Warningln(err)
   222  			return
   223  		}
   224  		s.sendoff.obj = *obj
   225  		obj = &s.sendoff.obj
   226  		if obj.Hdr.isIdleTick() {
   227  			if len(s.workCh) > 0 {
   228  				goto repeat
   229  			}
   230  			return s.deactivate()
   231  		}
   232  		l := insObjHeader(s.maxhdr, &obj.Hdr, s.usePDU())
   233  		s.header = s.maxhdr[:l]
   234  		s.sendoff.ins = inHdr
   235  		return s.sendHdr(b)
   236  	case <-s.stopCh.Listen():
   237  		if verbose {
   238  			nlog.Infof("%s: stopped (%d/%d)", s, s.numCur, s.stats.Num.Load())
   239  		}
   240  		err = io.EOF
   241  		return
   242  	}
   243  }
   244  
   245  func (s *Stream) sendHdr(b []byte) (n int, err error) {
   246  	n = copy(b, s.header[s.sendoff.off:])
   247  	s.sendoff.off += int64(n)
   248  	if s.sendoff.off < int64(len(s.header)) {
   249  		return
   250  	}
   251  	debug.Assert(s.sendoff.off == int64(len(s.header)))
   252  	s.stats.Offset.Add(s.sendoff.off)
   253  	if verbose {
   254  		num := s.stats.Num.Load()
   255  		nlog.Infof("%s: hlen=%d (%d/%d)", s, s.sendoff.off, s.numCur, num)
   256  	}
   257  	obj := &s.sendoff.obj
   258  	if s.usePDU() && !obj.IsHeaderOnly() {
   259  		s.sendoff.ins = inPDU
   260  	} else {
   261  		s.sendoff.ins = inData
   262  	}
   263  	s.sendoff.off = 0
   264  	if obj.Hdr.isFin() {
   265  		if verbose {
   266  			nlog.Infof("%s: sent last", s)
   267  		}
   268  		err = io.EOF
   269  		s.lastCh.Close()
   270  	}
   271  	return
   272  }
   273  
   274  func (s *Stream) sendData(b []byte) (n int, err error) {
   275  	var (
   276  		obj     = &s.sendoff.obj
   277  		objSize = obj.Size()
   278  	)
   279  	n, err = obj.Reader.Read(b)
   280  	s.sendoff.off += int64(n)
   281  	if err != nil {
   282  		if err == io.EOF {
   283  			if s.sendoff.off < objSize {
   284  				return n, fmt.Errorf("%s: read (%d) shorter than size (%d)", s, s.sendoff.off, objSize)
   285  			}
   286  			err = nil
   287  		}
   288  		s.eoObj(err)
   289  	} else if s.sendoff.off >= objSize {
   290  		s.eoObj(err)
   291  	}
   292  	return
   293  }
   294  
   295  func (s *Stream) sendPDU(b []byte) (n int) {
   296  	n = s.pdu.read(b)
   297  	return
   298  }
   299  
   300  // end-of-object:
   301  // - update stats, reset idle timeout, and post completion
   302  // - note that reader.Close() is done by `doCmpl`
   303  // TODO: ideally, there's a way to flush buffered data to the underlying connection :NOTE
   304  func (s *Stream) eoObj(err error) {
   305  	obj := &s.sendoff.obj
   306  	objSize := obj.Size()
   307  	if obj.IsUnsized() {
   308  		objSize = s.sendoff.off
   309  	}
   310  	s.sizeCur += s.sendoff.off
   311  	s.stats.Offset.Add(s.sendoff.off)
   312  	if err != nil {
   313  		goto exit
   314  	}
   315  	if s.sendoff.off != objSize {
   316  		err = fmt.Errorf("%s: %s offset %d != size", s, obj, s.sendoff.off)
   317  		goto exit
   318  	}
   319  	// this stream stats
   320  	s.stats.Size.Add(objSize)
   321  	s.numCur++
   322  	s.stats.Num.Inc()
   323  	if verbose {
   324  		nlog.Infof("%s: sent %s (%d/%d)", s, obj, s.numCur, s.stats.Num.Load())
   325  	}
   326  
   327  	// target stats
   328  	g.tstats.Inc(OutObjCount)
   329  	g.tstats.Add(OutObjSize, objSize)
   330  exit:
   331  	if err != nil {
   332  		nlog.Errorln(err)
   333  	}
   334  
   335  	// next completion => SCQ
   336  	s.cmplCh <- cmpl{err, s.sendoff.obj}
   337  	s.sendoff = sendoff{ins: inEOB}
   338  }
   339  
   340  func (s *Stream) inSend() bool { return s.sendoff.ins >= inHdr || s.sendoff.ins < inEOB }
   341  
   342  func (s *Stream) dryrun() {
   343  	var (
   344  		body = io.NopCloser(s)
   345  		h    = &hdl{trname: s.trname}
   346  		it   = iterator{handler: h, body: body, hbuf: make([]byte, dfltMaxHdr)}
   347  	)
   348  	for {
   349  		hlen, flags, err := it.nextProtoHdr(s.String())
   350  		if err == io.EOF {
   351  			break
   352  		}
   353  		debug.AssertNoErr(err)
   354  		debug.Assert(flags&msgFl == 0)
   355  		obj, err := it.nextObj(s.String(), hlen)
   356  		if obj != nil {
   357  			cos.DrainReader(obj) // TODO: recycle `objReader` here
   358  			continue
   359  		}
   360  		if err != nil {
   361  			break
   362  		}
   363  	}
   364  }
   365  
   366  func (s *Stream) errCmpl(err error) {
   367  	if s.inSend() {
   368  		s.cmplCh <- cmpl{err, s.sendoff.obj}
   369  	}
   370  }
   371  
   372  // gc: drain terminated stream
   373  func (s *Stream) drain(err error) {
   374  	for {
   375  		select {
   376  		case obj := <-s.workCh:
   377  			s.doCmpl(obj, err)
   378  		default:
   379  			return
   380  		}
   381  	}
   382  }
   383  
   384  // gc:
   385  func (s *Stream) closeAndFree() {
   386  	close(s.workCh)
   387  	close(s.cmplCh)
   388  
   389  	g.mm.Free(s.maxhdr)
   390  	if s.pdu != nil {
   391  		s.pdu.free(g.mm)
   392  	}
   393  }
   394  
   395  // gc: post idle tick if idle
   396  func (s *Stream) idleTick() {
   397  	if len(s.workCh) == 0 && s.sessST.CAS(active, inactive) {
   398  		s.workCh <- &Obj{Hdr: ObjHdr{Opcode: opcIdleTick}}
   399  		if verbose {
   400  			nlog.Infof("%s: active => inactive", s)
   401  		}
   402  	}
   403  }
   404  
   405  ///////////
   406  // Stats //
   407  ///////////
   408  
   409  func (stats *Stats) CompressionRatio() float64 {
   410  	bytesRead := stats.Offset.Load()
   411  	bytesSent := stats.CompressedSize.Load()
   412  	return float64(bytesRead) / float64(bytesSent)
   413  }
   414  
   415  ///////////////
   416  // lz4Stream //
   417  ///////////////
   418  
   419  func (lz4s *lz4Stream) Read(b []byte) (n int, err error) {
   420  	var (
   421  		sendoff = &lz4s.s.sendoff
   422  		last    = sendoff.obj.Hdr.isFin()
   423  		retry   = maxInReadRetries // insist on returning n > 0 (note that lz4 compresses /blocks/)
   424  	)
   425  	if lz4s.sgl.Len() > 0 {
   426  		lz4s.zw.Flush()
   427  		n, err = lz4s.sgl.Read(b)
   428  		if err == io.EOF { // reusing/rewinding this buf multiple times
   429  			err = nil
   430  		}
   431  		goto ex
   432  	}
   433  re:
   434  	n, err = lz4s.s.Read(b)
   435  	_, _ = lz4s.zw.Write(b[:n])
   436  	if last {
   437  		lz4s.zw.Flush()
   438  		retry = 0
   439  	} else if lz4s.s.sendoff.ins == inEOB || err != nil {
   440  		lz4s.zw.Flush()
   441  		retry = 0
   442  	}
   443  	n, _ = lz4s.sgl.Read(b)
   444  	if n == 0 {
   445  		if retry > 0 {
   446  			retry--
   447  			runtime.Gosched()
   448  			goto re
   449  		}
   450  		lz4s.zw.Flush()
   451  		n, _ = lz4s.sgl.Read(b)
   452  	}
   453  ex:
   454  	lz4s.s.stats.CompressedSize.Add(int64(n))
   455  	if lz4s.sgl.Len() == 0 {
   456  		lz4s.sgl.Reset()
   457  	}
   458  	if last && err == nil {
   459  		err = io.EOF
   460  	}
   461  	return
   462  }