github.com/Jeffail/benthos/v3@v3.65.0/lib/buffer/single/mmap_cache.go (about)

     1  //go:build !wasm
     2  // +build !wasm
     3  
     4  package single
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"os"
    10  	"path"
    11  	"sync"
    12  
    13  	"github.com/Jeffail/benthos/v3/lib/log"
    14  	"github.com/Jeffail/benthos/v3/lib/metrics"
    15  	"github.com/Jeffail/benthos/v3/lib/util/disk"
    16  	mmap "github.com/edsrzf/mmap-go"
    17  )
    18  
    19  //------------------------------------------------------------------------------
    20  
    21  // MmapCacheConfig is config options for the MmapCache type.
    22  type MmapCacheConfig struct {
    23  	Path              string `json:"directory" yaml:"directory"`
    24  	FileSize          int    `json:"file_size" yaml:"file_size"`
    25  	RetryPeriod       string `json:"retry_period" yaml:"retry_period"`
    26  	CleanUp           bool   `json:"clean_up" yaml:"clean_up"`
    27  	ReservedDiskSpace uint64 `json:"reserved_disk_space" yaml:"reserved_disk_space"`
    28  }
    29  
    30  // NewMmapCacheConfig creates a new MmapCacheConfig oject with default values.
    31  func NewMmapCacheConfig() MmapCacheConfig {
    32  	return MmapCacheConfig{
    33  		Path:              "",
    34  		FileSize:          250 * 1024 * 1024, // 250MiB
    35  		RetryPeriod:       "1s",              // 1 second
    36  		CleanUp:           true,
    37  		ReservedDiskSpace: 100 * 1024 * 1024, // 50MiB
    38  	}
    39  }
    40  
    41  // CachedMmap is a struct containing a cached Mmap file and the file handler.
    42  type CachedMmap struct {
    43  	f *os.File
    44  	m mmap.MMap
    45  }
    46  
    47  // MmapCache keeps track of any Mmap files cached in memory and cleans up
    48  // resources as they are unclaimed. This type works similarly to sync.Cond,
    49  // where if you wish to use it you need to lock it.
    50  type MmapCache struct {
    51  	config MmapCacheConfig
    52  
    53  	logger log.Modular
    54  	stats  metrics.Type
    55  
    56  	tracker    CachedMmap
    57  	cache      map[int]CachedMmap
    58  	inProgress map[int]struct{}
    59  
    60  	*sync.Cond
    61  }
    62  
    63  // NewMmapCache creates a cache for managing open mmap files.
    64  func NewMmapCache(config MmapCacheConfig, log log.Modular, stats metrics.Type) (*MmapCache, error) {
    65  	f := &MmapCache{
    66  		config:     config,
    67  		logger:     log,
    68  		stats:      stats,
    69  		cache:      make(map[int]CachedMmap),
    70  		inProgress: make(map[int]struct{}),
    71  		Cond:       sync.NewCond(&sync.Mutex{}),
    72  	}
    73  
    74  	if err := f.openTracker(); err != nil {
    75  		return nil, err
    76  	}
    77  	return f, nil
    78  }
    79  
    80  //------------------------------------------------------------------------------
    81  
    82  var (
    83  	// ErrWrongTrackerLength means the length of a read tracker was not correct.
    84  	ErrWrongTrackerLength = errors.New("tracker was unexpected length")
    85  
    86  	// ErrNotEnoughSpace means the target disk lacked the space needed for a new
    87  	// file.
    88  	ErrNotEnoughSpace = errors.New("target disk is at capacity")
    89  )
    90  
    91  // openTracker opens a tracker file for recording reader and writer indexes.
    92  func (f *MmapCache) openTracker() error {
    93  	defer f.Broadcast()
    94  
    95  	var fileInfo os.FileInfo
    96  	var err error
    97  
    98  	// Attempt to create the directory tree, ignore errors.
    99  	_ = os.MkdirAll(f.config.Path, 0o755)
   100  
   101  	fPath := path.Join(f.config.Path, "tracker")
   102  
   103  	fileInfo, err = os.Stat(fPath)
   104  	// If the tracker file doesn't exist we make a blank one.
   105  	if os.IsNotExist(err) {
   106  		f.tracker.f, err = os.Create(fPath)
   107  		block := make([]byte, 16)
   108  		if err == nil {
   109  			_, err = f.tracker.f.Write(block)
   110  		}
   111  	} else if err == nil && fileInfo.Size() == 16 {
   112  		f.tracker.f, err = os.OpenFile(fPath, os.O_RDWR, 0o644)
   113  	} else if err == nil {
   114  		err = ErrWrongTrackerLength
   115  	}
   116  
   117  	// Create the memory mapping.
   118  	if err == nil {
   119  		f.tracker.m, err = mmap.MapRegion(f.tracker.f, 16, mmap.RDWR, 0, 0)
   120  	}
   121  	return err
   122  }
   123  
   124  //------------------------------------------------------------------------------
   125  
   126  // GetTracker returns the []byte from the tracker file memory mapping.
   127  func (f *MmapCache) GetTracker() []byte {
   128  	return f.tracker.m
   129  }
   130  
   131  // Get returns the []byte from a memory mapped file index.
   132  func (f *MmapCache) Get(index int) []byte {
   133  	if c, exists := f.cache[index]; exists {
   134  		return c.m
   135  	}
   136  	return []byte{}
   137  }
   138  
   139  // EnsureCached checks that a particular index is cached, and if not then read
   140  // the index, this call blocks until either the index is successfully cached or
   141  // an error occurs.
   142  func (f *MmapCache) EnsureCached(index int) error {
   143  	var cache CachedMmap
   144  	var err error
   145  
   146  	// If we are already in the process of caching this index wait until that
   147  	// attempt is finished.
   148  	if _, inProgress := f.inProgress[index]; inProgress {
   149  		for inProgress {
   150  			f.Wait()
   151  			_, inProgress = f.inProgress[index]
   152  		}
   153  	}
   154  
   155  	// If the index is cached then return nil.
   156  	if f.IsCached(index) {
   157  		return nil
   158  	}
   159  
   160  	// Place the index in our inProgress map to indicate we are caching it on
   161  	// this goroutine.
   162  	f.inProgress[index] = struct{}{}
   163  
   164  	// Unlock our mutex as we are about to perform blocking, thread safe
   165  	// operations.
   166  	f.L.Unlock()
   167  
   168  	// Prefix index files with "mmap_"
   169  	fPath := path.Join(f.config.Path, fmt.Sprintf("mmap_%v", index))
   170  
   171  	// Check if file already exists
   172  	_, err = os.Stat(fPath)
   173  	if os.IsNotExist(err) {
   174  		// If we lack the space needed (reserved space + file size) then return
   175  		// error
   176  		if uint64(f.config.FileSize)+f.config.ReservedDiskSpace >
   177  			disk.TotalRemaining(f.config.Path) {
   178  			err = ErrNotEnoughSpace
   179  		} else if cache.f, err = os.Create(fPath); err == nil { // If not then we create it with our configured file size
   180  			block := make([]byte, f.config.FileSize)
   181  			if _, err = cache.f.Write(block); err != nil {
   182  				os.Remove(fPath)
   183  			}
   184  		}
   185  	} else if err == nil {
   186  		cache.f, err = os.OpenFile(fPath, os.O_RDWR, 0o644)
   187  	}
   188  
   189  	// Lock our mutex again
   190  	f.L.Lock()
   191  
   192  	// Defer broadcast and deletion of inProgress flag.
   193  	defer func() {
   194  		delete(f.inProgress, index)
   195  		f.Broadcast()
   196  	}()
   197  
   198  	// Create the memory mapping.
   199  	if err == nil {
   200  		cache.m, err = mmap.Map(cache.f, mmap.RDWR, 0)
   201  		if err != nil {
   202  			cache.f.Close()
   203  			os.Remove(fPath)
   204  		} else {
   205  			f.cache[index] = cache
   206  		}
   207  	}
   208  	return err
   209  }
   210  
   211  // IsCached returns a bool indicating whether the current memory mapped file
   212  // index is cached.
   213  func (f *MmapCache) IsCached(index int) bool {
   214  	_, exists := f.cache[index]
   215  	return exists
   216  }
   217  
   218  // RemoveAll removes all indexes from the cache as well as the tracker.
   219  func (f *MmapCache) RemoveAll() error {
   220  	for _, c := range f.cache {
   221  		c.m.Flush()
   222  		c.m.Unmap()
   223  		c.f.Close()
   224  	}
   225  	f.cache = map[int]CachedMmap{}
   226  
   227  	f.tracker.m.Flush()
   228  	f.tracker.m.Unmap()
   229  	f.tracker.f.Close()
   230  
   231  	f.tracker = CachedMmap{}
   232  	return nil
   233  }
   234  
   235  // Remove removes the index from our cache, the file is NOT deleted.
   236  func (f *MmapCache) Remove(index int) error {
   237  	if c, ok := f.cache[index]; ok {
   238  		delete(f.cache, index)
   239  
   240  		// Now we are flushing the cache, this could block so we unlock
   241  		// temporarily.
   242  		f.L.Unlock()
   243  		defer f.L.Lock()
   244  
   245  		// TODO: What happens if we subsequently opened the same map file during
   246  		// this operation?
   247  		c.m.Flush()
   248  		c.m.Unmap()
   249  		c.f.Close()
   250  	}
   251  	return nil
   252  }
   253  
   254  // Delete deletes the file for an index.
   255  func (f *MmapCache) Delete(index int) error {
   256  	p := path.Join(f.config.Path, fmt.Sprintf("mmap_%v", index))
   257  
   258  	// This could be a blocking call, and there's no reason to keep the cache
   259  	// locked.
   260  	f.L.Unlock()
   261  	defer f.L.Lock()
   262  
   263  	return os.Remove(p)
   264  }
   265  
   266  //------------------------------------------------------------------------------