github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/memsys/iosgl.go (about)

     1  // Package memsys provides memory management and slab/SGL allocation with io.Reader and io.Writer interfaces
     2  // on top of scatter-gather lists of reusable buffers.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package memsys
     7  
     8  import (
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"sync"
    14  
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/atomic"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  	"github.com/NVIDIA/aistore/cmn/nlog"
    20  )
    21  
    22  // interface guard
    23  var (
    24  	_ io.ByteScanner = (*SGL)(nil)
    25  
    26  	_ io.ReaderFrom = (*SGL)(nil)
    27  	_ cos.WriterAt  = (*SGL)(nil)
    28  	_ io.WriterTo   = (*SGL)(nil)
    29  	_ cos.WriterTo2 = (*SGL)(nil) // simplified io.WriteTo to write entire (0 -:- woff) content
    30  
    31  	_ cos.ReadOpenCloser = (*SGL)(nil)
    32  	_ cos.ReadOpenCloser = (*Reader)(nil)
    33  )
    34  
    35  type (
    36  	// implements io.ReadWriteCloser + Reset
    37  	SGL struct {
    38  		slab *Slab
    39  		sgl  [][]byte
    40  		woff int64
    41  		roff int64
    42  	}
    43  	// uses the underlying SGL to implement io.ReadWriteCloser + io.Seeker
    44  	Reader struct {
    45  		z    *SGL
    46  		roff int64
    47  	}
    48  )
    49  
    50  /////////////
    51  // sglPool //
    52  /////////////
    53  
    54  const numPools = 8
    55  
    56  var (
    57  	pgPools      [numPools]sync.Pool
    58  	smPools      [numPools]sync.Pool
    59  	sgl0         SGL
    60  	pgIdx, smIdx atomic.Uint32
    61  )
    62  
    63  func _allocSGL(isPage bool) (z *SGL) {
    64  	var pool *sync.Pool
    65  	if isPage {
    66  		idx := (pgIdx.Load() + 1) % numPools
    67  		pool = &pgPools[idx]
    68  	} else {
    69  		idx := (smIdx.Load() + 1) % numPools
    70  		pool = &smPools[idx]
    71  	}
    72  	if v := pool.Get(); v != nil {
    73  		z = v.(*SGL)
    74  	} else {
    75  		z = &SGL{}
    76  	}
    77  	return
    78  }
    79  
    80  func _freeSGL(z *SGL, isPage bool) {
    81  	var pool *sync.Pool
    82  	if isPage {
    83  		idx := pgIdx.Inc() % numPools
    84  		pool = &pgPools[idx]
    85  	} else {
    86  		idx := smIdx.Inc() % numPools
    87  		pool = &smPools[idx]
    88  	}
    89  	sgl := z.sgl[:0]
    90  	*z = sgl0
    91  	z.sgl = sgl
    92  	pool.Put(z)
    93  }
    94  
    95  /////////
    96  // SGL //
    97  /////////
    98  
    99  func (z *SGL) Cap() int64  { return int64(len(z.sgl)) * z.slab.Size() }
   100  func (z *SGL) Size() int64 { return z.woff }
   101  func (z *SGL) Roff() int64 { return z.roff }
   102  func (z *SGL) Slab() *Slab { return z.slab }
   103  func (z *SGL) IsNil() bool { return z == nil || z.slab == nil }
   104  
   105  // grows on demand upon writing
   106  func (z *SGL) grow(toSize int64) {
   107  	z.slab.muget.Lock()
   108  	for z.Cap() < toSize {
   109  		z.sgl = append(z.sgl, z.slab._alloc())
   110  	}
   111  	z.slab.muget.Unlock()
   112  }
   113  
   114  // usage via io.Copy(z, source), whereby `z` reads from the `source` until EOF
   115  // see also: WriteTo
   116  func (z *SGL) ReadFrom(r io.Reader) (n int64, _ error) {
   117  	for {
   118  		if c := z.Cap(); z.woff > c-128 {
   119  			z.grow(c + max(z.slab.Size(), DefaultBufSize))
   120  		}
   121  		idx := z.woff / z.slab.Size()
   122  		off := z.woff % z.slab.Size()
   123  		buf := z.sgl[idx]
   124  
   125  		written, err := r.Read(buf[off:])
   126  		z.woff += int64(written)
   127  		n += int64(written)
   128  		if err != nil {
   129  			if err == io.EOF {
   130  				return n, nil
   131  			}
   132  			return n, err
   133  		}
   134  	}
   135  }
   136  
   137  // simplified for speed
   138  // - disregards roff, usage is strictly limited to writing an _entire_ sgl
   139  // - compare w/ WriteTo below
   140  func (z *SGL) WriteTo2(dst io.Writer) error {
   141  	rem := z.woff
   142  	siz := z.slab.Size()
   143  	for _, buf := range z.sgl {
   144  		l := min(rem, siz)
   145  		if l <= 0 {
   146  			break
   147  		}
   148  		written, err := dst.Write(buf[:l])
   149  		rem -= l
   150  		if err != nil {
   151  			if cmn.Rom.FastV(5, cos.SmoduleMemsys) {
   152  				nlog.Errorln(err)
   153  			}
   154  			return err
   155  		}
   156  		debug.Assert(written == int(l), written, " vs ", l)
   157  	}
   158  	return nil
   159  }
   160  
   161  // compliant io.WriterTo interface impl-n (compare w/ WriteTo2)
   162  // usage via io.Copy(dst, z), whereby `z` writes to the `dst` until EOF
   163  // see also: ReadFrom
   164  func (z *SGL) WriteTo(dst io.Writer) (n int64, _ error) {
   165  	var (
   166  		idx = int(z.roff / z.slab.Size())
   167  		off = z.roff % z.slab.Size()
   168  	)
   169  	for {
   170  		rem := z.Len()
   171  		if rem <= 0 {
   172  			break
   173  		}
   174  		buf := z.sgl[idx]
   175  		siz := min(z.slab.Size()-off, rem)
   176  		written, err := dst.Write(buf[off : off+siz])
   177  		m := int64(written)
   178  		n += m
   179  		z.roff += m
   180  		if m < siz && err == nil {
   181  			err = io.ErrShortWrite
   182  		}
   183  		if err != nil {
   184  			if cmn.Rom.FastV(5, cos.SmoduleMemsys) {
   185  				nlog.Errorln(err)
   186  			}
   187  			return n, err
   188  		}
   189  		debug.Assert(m == siz, m, " vs ", siz) // (unlikely m > siz)
   190  		idx++
   191  		off = 0
   192  	}
   193  	return n, nil
   194  }
   195  
   196  func (z *SGL) Write(p []byte) (n int, err error) {
   197  	wlen := len(p)
   198  	if needtot := z.woff + int64(wlen); needtot > z.Cap() {
   199  		z.grow(needtot)
   200  	}
   201  	idx, off, poff := z.woff/z.slab.Size(), z.woff%z.slab.Size(), 0
   202  	for wlen > 0 {
   203  		size := min(z.slab.Size()-off, int64(wlen))
   204  		buf := z.sgl[idx]
   205  		src := p[poff : poff+int(size)]
   206  		copy(buf[off:], src)
   207  		z.woff += size
   208  		idx++
   209  		off = 0
   210  		wlen -= int(size)
   211  		poff += int(size)
   212  	}
   213  	return len(p), nil
   214  }
   215  
   216  func (z *SGL) WriteByte(c byte) error {
   217  	if needtot := z.woff + 1; needtot > z.Cap() {
   218  		z.grow(needtot)
   219  	}
   220  	idx, off := z.woff/z.slab.Size(), z.woff%z.slab.Size()
   221  	buf := z.sgl[idx]
   222  	buf[off] = c
   223  	z.woff++
   224  	return nil
   225  }
   226  
   227  func (z *SGL) Read(b []byte) (n int, err error) {
   228  	n, z.roff, err = z._readAt(b, z.roff)
   229  	return
   230  }
   231  
   232  func (z *SGL) ReadByte() (byte, error) {
   233  	var (
   234  		b           [1]byte
   235  		_, off, err = z._readAt(b[:], z.roff)
   236  	)
   237  	z.roff = off
   238  	return b[0], err
   239  }
   240  
   241  func (z *SGL) UnreadByte() error {
   242  	if z.roff == 0 {
   243  		return errors.New("memsys: cannot unread-byte at zero offset")
   244  	}
   245  	z.roff--
   246  	return nil
   247  }
   248  
   249  func (z *SGL) _readAt(b []byte, roffin int64) (n int, roff int64, err error) {
   250  	roff = roffin
   251  	if roff >= z.woff {
   252  		return 0, roff, io.EOF
   253  	}
   254  	var (
   255  		idx, off = int(roff / z.slab.Size()), roff % z.slab.Size()
   256  		buf      = z.sgl[idx]
   257  		size     = min(int64(len(b)), z.woff-roff)
   258  	)
   259  	n = copy(b[:size], buf[off:])
   260  	roff += int64(n)
   261  	for n < len(b) && idx < len(z.sgl)-1 {
   262  		idx++
   263  		buf = z.sgl[idx]
   264  		size = min(int64(len(b)-n), z.woff-roff)
   265  		n1 := copy(b[n:n+int(size)], buf)
   266  		roff += int64(n1)
   267  		n += n1
   268  	}
   269  	if n < len(b) {
   270  		err = io.EOF
   271  	}
   272  	return n, roff, err
   273  }
   274  
   275  // ReadAll is a strictly _convenience_ method as it performs heap allocation.
   276  // Still, it's an optimized alternative to the generic io.ReadAll which
   277  // normally returns err == nil (and not io.EOF) upon successful reading until EOF.
   278  // ReadAll always returns err == nil.
   279  func (z *SGL) ReadAll() (b []byte) {
   280  	b = make([]byte, z.Size())
   281  	for off, i := 0, 0; i < len(z.sgl); i++ {
   282  		n := copy(b[off:], z.sgl[i])
   283  		off += n
   284  	}
   285  	return
   286  }
   287  
   288  func (z *SGL) ErrDumpAt() {
   289  	if z.roff >= z.woff {
   290  		return
   291  	}
   292  	var (
   293  		idx, off = int(z.roff / z.slab.Size()), int(z.roff % z.slab.Size())
   294  		buf      = z.sgl[idx]
   295  		part     = buf[off:]
   296  	)
   297  	s := fmt.Sprintf("at %2d: %s", idx, cos.BHead(part, 128))
   298  	nlog.ErrorDepth(1, s)
   299  	if idx < len(z.sgl)-1 {
   300  		buf = z.sgl[idx+1]
   301  		part = buf[0:]
   302  		s = fmt.Sprintf("at %2d: %s", idx+1, cos.BHead(part, 128))
   303  		nlog.ErrorDepth(1, s)
   304  	}
   305  }
   306  
   307  func (z *SGL) NextLine(lin []byte, advanceRoff bool) (lout []byte, err error) {
   308  	if z.roff >= z.woff {
   309  		return nil, io.EOF
   310  	}
   311  
   312  	var (
   313  		l        int
   314  		part     []byte
   315  		idx, off = int(z.roff / z.slab.Size()), int(z.roff % z.slab.Size())
   316  		buf      = z.sgl[idx]
   317  		i        = bytes.IndexRune(buf[off:], '\n')
   318  	)
   319  	if i <= 0 {
   320  		part = buf[off:]
   321  		l = len(part)
   322  		// when line's spliced across two bufs
   323  		if idx >= len(z.sgl)-1 {
   324  			return nil, errors.New("sgl last buf with a partial line: " + cos.BHead(part, 128))
   325  		}
   326  		buf = z.sgl[idx+1]
   327  		off = 0
   328  		i = bytes.IndexRune(buf, '\n')
   329  		if i < 0 {
   330  			return nil, fmt.Errorf("missing eol: %q, %q", cos.BHead(part, 128), cos.BHead(buf))
   331  		}
   332  	}
   333  	if cap(lin) < i+l {
   334  		debug.Assert(lin == nil, "check initial line buf cap: ", cap(lin), " vs ", i+l)
   335  		lout = make([]byte, i+l)
   336  	} else {
   337  		lout = lin[:i+l]
   338  	}
   339  
   340  	// copy the `part`
   341  	copy(lout, part)
   342  	copy(lout[l:], buf[off:off+i])
   343  
   344  	if advanceRoff {
   345  		// i.e., read it
   346  		z.roff += int64(i + l + 1)
   347  	}
   348  	return lout, nil
   349  }
   350  
   351  // NOTE assert and use with caution.
   352  func (z *SGL) WriteAt(p []byte, off int64) (n int, err error) {
   353  	debug.Assert(z.woff >= off+int64(len(p)))
   354  
   355  	prevWriteOff := z.woff
   356  	z.woff = off
   357  	n, err = z.Write(p)
   358  	z.woff = prevWriteOff
   359  	return n, err
   360  }
   361  
   362  // reuse already allocated SGL (compare with Reader below)
   363  func (z *SGL) Reset()  { z.woff, z.roff = 0, 0 }
   364  func (z *SGL) Rewind() { z.roff = 0 }
   365  
   366  func (z *SGL) Len() int64                        { return z.woff - z.roff }
   367  func (z *SGL) Open() (cos.ReadOpenCloser, error) { return NewReader(z), nil }
   368  
   369  func (*SGL) Close() error { return nil } // NOTE: no-op
   370  
   371  func (z *SGL) Free() {
   372  	debug.Assert(z.slab != nil)
   373  	s := z.slab
   374  	s.muput.Lock()
   375  	for _, buf := range z.sgl {
   376  		size := cap(buf)
   377  		debug.Assert(int64(size) == s.Size())
   378  		b := buf[:size] // always freeing original (fixed buffer) size
   379  		deadbeef(b)
   380  		s.put = append(s.put, b)
   381  	}
   382  	s.muput.Unlock()
   383  	_freeSGL(z, z.slab.m.isPage())
   384  }
   385  
   386  // NOTE assert and use with caution: heap allocation (via ReadAll)
   387  // is intended for tests (and only tests)
   388  func (z *SGL) Bytes() (b []byte) {
   389  	cos.Assert(z.roff == 0)
   390  	if z.woff >= z.slab.Size() {
   391  		b = z.ReadAll()
   392  		return
   393  	}
   394  	return z.sgl[0][:z.woff]
   395  }
   396  
   397  ////////////
   398  // Reader //
   399  ////////////
   400  
   401  // Reader implements (io.ReadWriteCloser + io.Seeker) on top of an existing SGL.
   402  // In the most common write-once-read-many usage scenario, SGL can be simultaneously
   403  // read via multiple concurrent Readers.
   404  //
   405  // See related sgl methods: `Reset` and `Open`
   406  
   407  func NewReader(z *SGL) *Reader                      { return &Reader{z, 0} }
   408  func (r *Reader) Open() (cos.ReadOpenCloser, error) { return NewReader(r.z), nil }
   409  func (*Reader) Close() error                        { return nil }
   410  
   411  func (r *Reader) Read(b []byte) (n int, err error) {
   412  	n, r.roff, err = r.z._readAt(b, r.roff)
   413  	return n, err
   414  }
   415  
   416  func (r *Reader) Seek(from int64, whence int) (offset int64, err error) {
   417  	switch whence {
   418  	case io.SeekStart:
   419  		offset = from
   420  	case io.SeekCurrent:
   421  		offset = r.roff + from
   422  	case io.SeekEnd:
   423  		offset = r.z.woff + from
   424  	default:
   425  		return 0, errors.New("memsys: invalid whence")
   426  	}
   427  	if offset < 0 {
   428  		return 0, errors.New("memsys: negative position")
   429  	}
   430  	r.roff = offset
   431  	return
   432  }