github.com/dgraph-io/ristretto@v0.1.2-0.20240116140435-c67e07994f91/z/buffer.go

github.com/dgraph-io/ristretto@v0.1.2-0.20240116140435-c67e07994f91/z/buffer.go (about)

     1  /*
     2   * Copyright 2020 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package z
    18  
    19  import (
    20  	"encoding/binary"
    21  	"fmt"
    22  	"log"
    23  	"os"
    24  	"sort"
    25  	"sync/atomic"
    26  
    27  	"github.com/pkg/errors"
    28  )
    29  
    30  const (
    31  	defaultCapacity = 64
    32  	defaultTag      = "buffer"
    33  )
    34  
    35  // Buffer is equivalent of bytes.Buffer without the ability to read. It is NOT thread-safe.
    36  //
    37  // In UseCalloc mode, z.Calloc is used to allocate memory, which depending upon how the code is
    38  // compiled could use jemalloc for allocations.
    39  //
    40  // In UseMmap mode, Buffer  uses file mmap to allocate memory. This allows us to store big data
    41  // structures without using physical memory.
    42  //
    43  // MaxSize can be set to limit the memory usage.
    44  type Buffer struct {
    45  	padding       uint64     // number of starting bytes used for padding
    46  	offset        uint64     // used length of the buffer
    47  	buf           []byte     // backing slice for the buffer
    48  	bufType       BufferType // type of the underlying buffer
    49  	curSz         int        // capacity of the buffer
    50  	maxSz         int        // causes a panic if the buffer grows beyond this size
    51  	mmapFile      *MmapFile  // optional mmap backing for the buffer
    52  	autoMmapAfter int        // Calloc falls back to an mmaped tmpfile after crossing this size
    53  	autoMmapDir   string     // directory for autoMmap to create a tempfile in
    54  	persistent    bool       // when enabled, Release will not delete the underlying mmap file
    55  	tag           string     // used for jemalloc stats
    56  }
    57  
    58  func NewBuffer(capacity int, tag string) *Buffer {
    59  	if capacity < defaultCapacity {
    60  		capacity = defaultCapacity
    61  	}
    62  	if tag == "" {
    63  		tag = defaultTag
    64  	}
    65  	return &Buffer{
    66  		buf:     Calloc(capacity, tag),
    67  		bufType: UseCalloc,
    68  		curSz:   capacity,
    69  		offset:  8,
    70  		padding: 8,
    71  		tag:     tag,
    72  	}
    73  }
    74  
    75  // It is the caller's responsibility to set offset after this, because Buffer
    76  // doesn't remember what it was.
    77  func NewBufferPersistent(path string, capacity int) (*Buffer, error) {
    78  	file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666)
    79  	if err != nil {
    80  		return nil, err
    81  	}
    82  	buffer, err := newBufferFile(file, capacity)
    83  	if err != nil {
    84  		return nil, err
    85  	}
    86  	buffer.persistent = true
    87  	return buffer, nil
    88  }
    89  
    90  func NewBufferTmp(dir string, capacity int) (*Buffer, error) {
    91  	if dir == "" {
    92  		dir = tmpDir
    93  	}
    94  	file, err := os.CreateTemp(dir, "buffer")
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	return newBufferFile(file, capacity)
    99  }
   100  
   101  func newBufferFile(file *os.File, capacity int) (*Buffer, error) {
   102  	if capacity < defaultCapacity {
   103  		capacity = defaultCapacity
   104  	}
   105  	mmapFile, err := OpenMmapFileUsing(file, capacity, true)
   106  	if err != nil && err != NewFile {
   107  		return nil, err
   108  	}
   109  	buf := &Buffer{
   110  		buf:      mmapFile.Data,
   111  		bufType:  UseMmap,
   112  		curSz:    len(mmapFile.Data),
   113  		mmapFile: mmapFile,
   114  		offset:   8,
   115  		padding:  8,
   116  	}
   117  	return buf, nil
   118  }
   119  
   120  func NewBufferSlice(slice []byte) *Buffer {
   121  	return &Buffer{
   122  		offset:  uint64(len(slice)),
   123  		buf:     slice,
   124  		bufType: UseInvalid,
   125  	}
   126  }
   127  
   128  func (b *Buffer) WithAutoMmap(threshold int, path string) *Buffer {
   129  	if b.bufType != UseCalloc {
   130  		panic("can only autoMmap with UseCalloc")
   131  	}
   132  	b.autoMmapAfter = threshold
   133  	if path == "" {
   134  		b.autoMmapDir = tmpDir
   135  	} else {
   136  		b.autoMmapDir = path
   137  	}
   138  	return b
   139  }
   140  
   141  func (b *Buffer) WithMaxSize(size int) *Buffer {
   142  	b.maxSz = size
   143  	return b
   144  }
   145  
   146  func (b *Buffer) IsEmpty() bool {
   147  	return int(b.offset) == b.StartOffset()
   148  }
   149  
   150  // LenWithPadding would return the number of bytes written to the buffer so far
   151  // plus the padding at the start of the buffer.
   152  func (b *Buffer) LenWithPadding() int {
   153  	return int(atomic.LoadUint64(&b.offset))
   154  }
   155  
   156  // LenNoPadding would return the number of bytes written to the buffer so far
   157  // (without the padding).
   158  func (b *Buffer) LenNoPadding() int {
   159  	return int(atomic.LoadUint64(&b.offset) - b.padding)
   160  }
   161  
   162  // Bytes would return all the written bytes as a slice.
   163  func (b *Buffer) Bytes() []byte {
   164  	off := atomic.LoadUint64(&b.offset)
   165  	return b.buf[b.padding:off]
   166  }
   167  
   168  // Grow would grow the buffer to have at least n more bytes. In case the buffer is at capacity, it
   169  // would reallocate twice the size of current capacity + n, to ensure n bytes can be written to the
   170  // buffer without further allocation. In UseMmap mode, this might result in underlying file
   171  // expansion.
   172  func (b *Buffer) Grow(n int) {
   173  	if b.buf == nil {
   174  		panic("z.Buffer needs to be initialized before using")
   175  	}
   176  	if b.maxSz > 0 && int(b.offset)+n > b.maxSz {
   177  		err := fmt.Errorf(
   178  			"z.Buffer max size exceeded: %d offset: %d grow: %d", b.maxSz, b.offset, n)
   179  		panic(err)
   180  	}
   181  	if int(b.offset)+n < b.curSz {
   182  		return
   183  	}
   184  
   185  	// Calculate new capacity.
   186  	growBy := b.curSz + n
   187  	// Don't allocate more than 1GB at a time.
   188  	if growBy > 1<<30 {
   189  		growBy = 1 << 30
   190  	}
   191  	// Allocate at least n, even if it exceeds the 1GB limit above.
   192  	if n > growBy {
   193  		growBy = n
   194  	}
   195  	b.curSz += growBy
   196  
   197  	switch b.bufType {
   198  	case UseCalloc:
   199  		// If autoMmap gets triggered, copy the slice over to an mmaped file.
   200  		if b.autoMmapAfter > 0 && b.curSz > b.autoMmapAfter {
   201  			b.bufType = UseMmap
   202  			file, err := os.CreateTemp(b.autoMmapDir, "")
   203  			if err != nil {
   204  				panic(err)
   205  			}
   206  			mmapFile, err := OpenMmapFileUsing(file, b.curSz, true)
   207  			if err != nil && err != NewFile {
   208  				panic(err)
   209  			}
   210  			assert(int(b.offset) == copy(mmapFile.Data, b.buf[:b.offset]))
   211  			Free(b.buf)
   212  			b.mmapFile = mmapFile
   213  			b.buf = mmapFile.Data
   214  			break
   215  		}
   216  
   217  		// Else, reallocate the slice.
   218  		newBuf := Calloc(b.curSz, b.tag)
   219  		assert(int(b.offset) == copy(newBuf, b.buf[:b.offset]))
   220  		Free(b.buf)
   221  		b.buf = newBuf
   222  
   223  	case UseMmap:
   224  		// Truncate and remap the underlying file.
   225  		if err := b.mmapFile.Truncate(int64(b.curSz)); err != nil {
   226  			err = errors.Wrapf(err,
   227  				"while trying to truncate file: %s to size: %d", b.mmapFile.Fd.Name(), b.curSz)
   228  			panic(err)
   229  		}
   230  		b.buf = b.mmapFile.Data
   231  
   232  	default:
   233  		panic("can only use Grow on UseCalloc and UseMmap buffers")
   234  	}
   235  }
   236  
   237  // Allocate is a way to get a slice of size n back from the buffer. This slice can be directly
   238  // written to. Warning: Allocate is not thread-safe. The byte slice returned MUST be used before
   239  // further calls to Buffer.
   240  func (b *Buffer) Allocate(n int) []byte {
   241  	b.Grow(n)
   242  	off := b.offset
   243  	b.offset += uint64(n)
   244  	return b.buf[off:int(b.offset)]
   245  }
   246  
   247  // AllocateOffset works the same way as allocate, but instead of returning a byte slice, it returns
   248  // the offset of the allocation.
   249  func (b *Buffer) AllocateOffset(n int) int {
   250  	b.Grow(n)
   251  	b.offset += uint64(n)
   252  	return int(b.offset) - n
   253  }
   254  
   255  func (b *Buffer) writeLen(sz int) {
   256  	buf := b.Allocate(8)
   257  	binary.BigEndian.PutUint64(buf, uint64(sz))
   258  }
   259  
   260  // SliceAllocate would encode the size provided into the buffer, followed by a call to Allocate,
   261  // hence returning the slice of size sz. This can be used to allocate a lot of small buffers into
   262  // this big buffer.
   263  // Note that SliceAllocate should NOT be mixed with normal calls to Write.
   264  func (b *Buffer) SliceAllocate(sz int) []byte {
   265  	b.Grow(8 + sz)
   266  	b.writeLen(sz)
   267  	return b.Allocate(sz)
   268  }
   269  
   270  func (b *Buffer) StartOffset() int {
   271  	return int(b.padding)
   272  }
   273  
   274  func (b *Buffer) WriteSlice(slice []byte) {
   275  	dst := b.SliceAllocate(len(slice))
   276  	assert(len(slice) == copy(dst, slice))
   277  }
   278  
   279  func (b *Buffer) SliceIterate(f func(slice []byte) error) error {
   280  	if b.IsEmpty() {
   281  		return nil
   282  	}
   283  
   284  	next := b.StartOffset()
   285  	var slice []byte
   286  	for next >= 0 {
   287  		slice, next = b.Slice(next)
   288  		if len(slice) == 0 {
   289  			continue
   290  		}
   291  		if err := f(slice); err != nil {
   292  			return err
   293  		}
   294  	}
   295  
   296  	return nil
   297  }
   298  
   299  const (
   300  	UseCalloc BufferType = iota
   301  	UseMmap
   302  	UseInvalid
   303  )
   304  
   305  type BufferType int
   306  
   307  func (t BufferType) String() string {
   308  	switch t {
   309  	case UseCalloc:
   310  		return "UseCalloc"
   311  	case UseMmap:
   312  		return "UseMmap"
   313  	default:
   314  		return "UseInvalid"
   315  	}
   316  }
   317  
   318  type LessFunc func(a, b []byte) bool
   319  type sortHelper struct {
   320  	offsets []int
   321  	b       *Buffer
   322  	tmp     *Buffer
   323  	less    LessFunc
   324  	small   []int
   325  }
   326  
   327  func (s *sortHelper) sortSmall(start, end int) {
   328  	s.tmp.Reset()
   329  	s.small = s.small[:0]
   330  	next := start
   331  	for next >= 0 && next < end {
   332  		s.small = append(s.small, next)
   333  		_, next = s.b.Slice(next)
   334  	}
   335  
   336  	// We are sorting the slices pointed to by s.small offsets, but only moving the offsets around.
   337  	sort.Slice(s.small, func(i, j int) bool {
   338  		left, _ := s.b.Slice(s.small[i])
   339  		right, _ := s.b.Slice(s.small[j])
   340  		return s.less(left, right)
   341  	})
   342  	// Now we iterate over the s.small offsets and copy over the slices. The result is now in order.
   343  	for _, off := range s.small {
   344  		_, _ = s.tmp.Write(rawSlice(s.b.buf[off:]))
   345  	}
   346  	assert(end-start == copy(s.b.buf[start:end], s.tmp.Bytes()))
   347  }
   348  
   349  func assert(b bool) {
   350  	if !b {
   351  		log.Fatalf("%+v", errors.Errorf("Assertion failure"))
   352  	}
   353  }
   354  func check(err error) {
   355  	if err != nil {
   356  		log.Fatalf("%+v", err)
   357  	}
   358  }
   359  func check2(_ interface{}, err error) {
   360  	check(err)
   361  }
   362  
   363  func (s *sortHelper) merge(left, right []byte, start, end int) {
   364  	if len(left) == 0 || len(right) == 0 {
   365  		return
   366  	}
   367  	s.tmp.Reset()
   368  	check2(s.tmp.Write(left))
   369  	left = s.tmp.Bytes()
   370  
   371  	var ls, rs []byte
   372  
   373  	copyLeft := func() {
   374  		assert(len(ls) == copy(s.b.buf[start:], ls))
   375  		left = left[len(ls):]
   376  		start += len(ls)
   377  	}
   378  	copyRight := func() {
   379  		assert(len(rs) == copy(s.b.buf[start:], rs))
   380  		right = right[len(rs):]
   381  		start += len(rs)
   382  	}
   383  
   384  	for start < end {
   385  		if len(left) == 0 {
   386  			assert(len(right) == copy(s.b.buf[start:end], right))
   387  			return
   388  		}
   389  		if len(right) == 0 {
   390  			assert(len(left) == copy(s.b.buf[start:end], left))
   391  			return
   392  		}
   393  		ls = rawSlice(left)
   394  		rs = rawSlice(right)
   395  
   396  		// We skip the first 4 bytes in the rawSlice, because that stores the length.
   397  		if s.less(ls[8:], rs[8:]) {
   398  			copyLeft()
   399  		} else {
   400  			copyRight()
   401  		}
   402  	}
   403  }
   404  
   405  func (s *sortHelper) sort(lo, hi int) []byte {
   406  	assert(lo <= hi)
   407  
   408  	mid := lo + (hi-lo)/2
   409  	loff, hoff := s.offsets[lo], s.offsets[hi]
   410  	if lo == mid {
   411  		// No need to sort, just return the buffer.
   412  		return s.b.buf[loff:hoff]
   413  	}
   414  
   415  	// lo, mid would sort from [offset[lo], offset[mid]) .
   416  	left := s.sort(lo, mid)
   417  	// Typically we'd use mid+1, but here mid represents an offset in the buffer. Each offset
   418  	// contains a thousand entries. So, if we do mid+1, we'd skip over those entries.
   419  	right := s.sort(mid, hi)
   420  
   421  	s.merge(left, right, loff, hoff)
   422  	return s.b.buf[loff:hoff]
   423  }
   424  
   425  // SortSlice is like SortSliceBetween but sorting over the entire buffer.
   426  func (b *Buffer) SortSlice(less func(left, right []byte) bool) {
   427  	b.SortSliceBetween(b.StartOffset(), int(b.offset), less)
   428  }
   429  func (b *Buffer) SortSliceBetween(start, end int, less LessFunc) {
   430  	if start >= end {
   431  		return
   432  	}
   433  	if start == 0 {
   434  		panic("start can never be zero")
   435  	}
   436  
   437  	var offsets []int
   438  	next, count := start, 0
   439  	for next >= 0 && next < end {
   440  		if count%1024 == 0 {
   441  			offsets = append(offsets, next)
   442  		}
   443  		_, next = b.Slice(next)
   444  		count++
   445  	}
   446  	assert(len(offsets) > 0)
   447  	if offsets[len(offsets)-1] != end {
   448  		offsets = append(offsets, end)
   449  	}
   450  
   451  	szTmp := int(float64((end-start)/2) * 1.1)
   452  	s := &sortHelper{
   453  		offsets: offsets,
   454  		b:       b,
   455  		less:    less,
   456  		small:   make([]int, 0, 1024),
   457  		tmp:     NewBuffer(szTmp, b.tag),
   458  	}
   459  	defer func() { _ = s.tmp.Release() }()
   460  
   461  	left := offsets[0]
   462  	for _, off := range offsets[1:] {
   463  		s.sortSmall(left, off)
   464  		left = off
   465  	}
   466  	s.sort(0, len(offsets)-1)
   467  }
   468  
   469  func rawSlice(buf []byte) []byte {
   470  	sz := binary.BigEndian.Uint64(buf)
   471  	return buf[:8+int(sz)]
   472  }
   473  
   474  // Slice would return the slice written at offset.
   475  func (b *Buffer) Slice(offset int) ([]byte, int) {
   476  	if offset >= int(b.offset) {
   477  		return nil, -1
   478  	}
   479  
   480  	sz := binary.BigEndian.Uint64(b.buf[offset:])
   481  	start := offset + 8
   482  	next := start + int(sz)
   483  	res := b.buf[start:next]
   484  	if next >= int(b.offset) {
   485  		next = -1
   486  	}
   487  	return res, next
   488  }
   489  
   490  // SliceOffsets is an expensive function. Use sparingly.
   491  func (b *Buffer) SliceOffsets() []int {
   492  	next := b.StartOffset()
   493  	var offsets []int
   494  	for next >= 0 {
   495  		offsets = append(offsets, next)
   496  		_, next = b.Slice(next)
   497  	}
   498  	return offsets
   499  }
   500  
   501  func (b *Buffer) Data(offset int) []byte {
   502  	if offset > b.curSz {
   503  		panic("offset beyond current size")
   504  	}
   505  	return b.buf[offset:b.curSz]
   506  }
   507  
   508  // Write would write p bytes to the buffer.
   509  func (b *Buffer) Write(p []byte) (n int, err error) {
   510  	n = len(p)
   511  	b.Grow(n)
   512  	assert(n == copy(b.buf[b.offset:], p))
   513  	b.offset += uint64(n)
   514  	return n, nil
   515  }
   516  
   517  // Reset would reset the buffer to be reused.
   518  func (b *Buffer) Reset() {
   519  	b.offset = uint64(b.StartOffset())
   520  }
   521  
   522  // Release would free up the memory allocated by the buffer. Once the usage of buffer is done, it is
   523  // important to call Release, otherwise a memory leak can happen.
   524  func (b *Buffer) Release() error {
   525  	if b == nil {
   526  		return nil
   527  	}
   528  	switch b.bufType {
   529  	case UseCalloc:
   530  		Free(b.buf)
   531  	case UseMmap:
   532  		if b.mmapFile == nil {
   533  			return nil
   534  		}
   535  		path := b.mmapFile.Fd.Name()
   536  		if err := b.mmapFile.Close(-1); err != nil {
   537  			return errors.Wrapf(err, "while closing file: %s", path)
   538  		}
   539  		if !b.persistent {
   540  			if err := os.Remove(path); err != nil {
   541  				return errors.Wrapf(err, "while deleting file %s", path)
   542  			}
   543  		}
   544  	}
   545  	return nil
   546  }