github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/bitmap/bitmap_mgr.go (about)

     1  package bitmap
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"path/filepath"
     8  	"runtime"
     9  	"sort"
    10  	"strconv"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/coyove/sdss/contrib/clock"
    16  	"golang.org/x/sync/singleflight"
    17  )
    18  
    19  type Manager struct {
    20  	mu, reloadmu sync.Mutex
    21  	dirname      string
    22  	switchLimit  int64
    23  	dirFiles     []string
    24  	current      *SaveAggregator
    25  	loader       singleflight.Group
    26  	cache        *Cache
    27  
    28  	DirMaxFiles int
    29  
    30  	Event struct {
    31  		OnLoaded  func(string, time.Duration)
    32  		OnSaved   func(string, int, error, time.Duration)
    33  		OnMissing func(int64) (*Range, error)
    34  	}
    35  }
    36  
    37  func (m *Manager) getPath(base int64) string {
    38  	return filepath.Join(m.dirname, fmt.Sprintf("%016x", base))
    39  }
    40  
    41  func (m *Manager) saveAggImpl(b *Range) error {
    42  	start := time.Now()
    43  	fn := m.getPath(b.Start())
    44  	x, err := b.Save(fn, b.Len() >= m.switchLimit)
    45  	if err == nil {
    46  		if bs, ok := m.Last(); !ok || bs != b.Start() {
    47  			err = m.ReloadFiles()
    48  		}
    49  	}
    50  	if m.Event.OnSaved != nil {
    51  		m.Event.OnSaved(fn, x, err, time.Since(start))
    52  	}
    53  	return err
    54  }
    55  
    56  func (m *Manager) load(offset int64) (*Range, error) {
    57  	if offset == m.current.Range().Start() {
    58  		return m.current.Range(), nil
    59  	}
    60  	fn := m.getPath(offset)
    61  	cached := m.cache.Get(fn)
    62  	if cached != nil {
    63  		return cached, nil
    64  	}
    65  	out, err, _ := m.loader.Do(fn, func() (interface{}, error) {
    66  		start := time.Now()
    67  		v, err := Load(fn)
    68  		if v == nil && err == nil {
    69  			return nil, nil
    70  		}
    71  		if m.Event.OnLoaded != nil {
    72  			m.Event.OnLoaded(fn, time.Since(start))
    73  		}
    74  		return v, err
    75  	})
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	if out == nil {
    80  		return nil, nil
    81  	}
    82  	m.cache.Add(fn, out.(*Range))
    83  	return out.(*Range), nil
    84  }
    85  
    86  func (m *Manager) findNext(mark int64) (int64, bool) {
    87  	marks := fmt.Sprintf("%016x", mark)
    88  	idx := sort.SearchStrings(m.dirFiles, marks)
    89  	if idx >= len(m.dirFiles) {
    90  		return 0, true
    91  	}
    92  	if m.dirFiles[idx] == marks {
    93  		idx++
    94  	}
    95  	prev, _ := strconv.ParseInt(m.dirFiles[idx], 16, 64)
    96  	return prev, false
    97  }
    98  
    99  func (m *Manager) findPrev(mark int64) (int64, bool) {
   100  	marks := fmt.Sprintf("%016x", mark)
   101  	idx := sort.SearchStrings(m.dirFiles, marks)
   102  	if idx >= len(m.dirFiles) {
   103  		idx = len(m.dirFiles)
   104  	}
   105  	if idx == 0 {
   106  		return 0, true
   107  	}
   108  	prev, _ := strconv.ParseInt(m.dirFiles[idx-1], 16, 64)
   109  	return prev, false
   110  }
   111  
   112  func (m *Manager) Last() (int64, bool) {
   113  	m.reloadmu.Lock()
   114  	defer m.reloadmu.Unlock()
   115  	v, empty := m.findPrev(clock.UnixMilli() + 1)
   116  	return v, !empty
   117  }
   118  
   119  func (m *Manager) ReloadFiles() error {
   120  	m.reloadmu.Lock()
   121  	defer m.reloadmu.Unlock()
   122  	df, err := os.Open(m.dirname)
   123  	if err != nil {
   124  		return err
   125  	}
   126  	defer df.Close()
   127  	names, err := df.Readdirnames(-1)
   128  	if err != nil {
   129  		return err
   130  	}
   131  
   132  	for i := len(names) - 1; i >= 0; i-- {
   133  		if strings.HasSuffix(names[i], ".mtfbak") {
   134  			names = append(names[:i], names[i+1:]...)
   135  		}
   136  	}
   137  
   138  	sort.Strings(names)
   139  	if m.DirMaxFiles > 0 {
   140  		for len(names) > m.DirMaxFiles {
   141  			os.Remove(filepath.Join(m.dirname, names[0]))
   142  			names = names[1:]
   143  		}
   144  	}
   145  
   146  	for _, n := range names {
   147  		if _, err := strconv.ParseInt(n, 16, 64); err != nil {
   148  			return fmt.Errorf("invalid filename %s/%s: %v", m.dirname, n, err)
   149  		}
   150  	}
   151  	m.dirFiles = names
   152  	return nil
   153  }
   154  
   155  func NewManager(dir string, switchLimit int64, cache *Cache) (*Manager, error) {
   156  	if err := os.MkdirAll(dir, 0777); err != nil {
   157  		return nil, err
   158  	}
   159  	if cache == nil {
   160  		cache = NewLRUCache(0)
   161  	}
   162  	m := &Manager{
   163  		dirname:     dir,
   164  		cache:       cache,
   165  		switchLimit: switchLimit,
   166  	}
   167  	if err := m.ReloadFiles(); err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	normBase := clock.UnixMilli()
   172  	prevBase, isEmpty := m.findPrev(normBase + 1)
   173  	if isEmpty {
   174  		m.current = New(normBase).AggregateSaves(m.saveAggImpl)
   175  	} else {
   176  		b, err := Load(m.getPath(prevBase))
   177  		if err != nil {
   178  			return nil, err
   179  		}
   180  		m.current = b.AggregateSaves(m.saveAggImpl)
   181  	}
   182  	return m, nil
   183  }
   184  
   185  func (m *Manager) Saver() *SaveAggregator {
   186  	m.mu.Lock()
   187  	defer m.mu.Unlock()
   188  	if m.current.Range().Len() >= m.switchLimit {
   189  		m.current.Close()
   190  		m.current = New(clock.UnixMilli()).AggregateSaves(m.saveAggImpl)
   191  	}
   192  	return m.current
   193  }
   194  
   195  func (m *Manager) WalkAsc(start int64, f func(*Range) bool) (err error) {
   196  	for {
   197  		if start == 0 {
   198  			// Since we use unix milli as the filename, 0 can't be a legal one.
   199  			start = 1
   200  		}
   201  		next, isLast := m.findNext(start - 1)
   202  		if isLast {
   203  			return io.EOF
   204  		}
   205  		b, err := m.load(next)
   206  		if err != nil {
   207  			return err
   208  		}
   209  		if b != nil && !f(b) {
   210  			return nil
   211  		}
   212  		start = next + 1
   213  	}
   214  }
   215  
   216  func (m *Manager) WalkDesc(start int64, f func(*Range) bool) (err error) {
   217  	for {
   218  		var b *Range
   219  
   220  		prev, isFirst := m.findPrev(start + 1)
   221  		if isFirst {
   222  			if m.Event.OnMissing != nil {
   223  				b, err = m.Event.OnMissing(start + 1)
   224  				goto LOADED
   225  			}
   226  			return io.EOF
   227  		}
   228  		b, err = m.load(prev)
   229  
   230  	LOADED:
   231  		if err != nil {
   232  			return err
   233  		}
   234  		if b != nil && !f(b) {
   235  			return nil
   236  		}
   237  		start = prev - 1
   238  	}
   239  }
   240  
   241  func (m *Manager) MultiWalkDesc(start int64, f func(*Range) bool) (err error) {
   242  	w := runtime.NumCPU()
   243  	for {
   244  		var starts []int64
   245  		for i := 0; i < w; i++ {
   246  			prev, isFirst := m.findPrev(start + 1)
   247  			if isFirst {
   248  				break
   249  			}
   250  			starts = append(starts, prev)
   251  			start = prev - 1
   252  		}
   253  
   254  		if len(starts) == 0 {
   255  			return io.EOF
   256  		}
   257  
   258  		var exited bool
   259  		var outErr error
   260  		var wg sync.WaitGroup
   261  		wg.Add(len(starts))
   262  		for _, s := range starts {
   263  			go func(s int64) {
   264  				defer wg.Done()
   265  				b, err := m.load(s)
   266  				if err != nil {
   267  					exited, outErr = true, err
   268  					return
   269  				}
   270  				if b != nil && !f(b) {
   271  					exited = true
   272  				}
   273  			}(s)
   274  		}
   275  		wg.Wait()
   276  		if exited {
   277  			return outErr
   278  		}
   279  	}
   280  }
   281  
   282  func (m *Manager) String() string {
   283  	return fmt.Sprintf("files: %d, saver: %.1f, cache: %d(%db)",
   284  		len(m.dirFiles), m.current.Metrics(), m.cache.Len(), m.cache.curWeight)
   285  }
   286  
   287  func (m *Manager) CollectSimple(dedup interface{ Add(Key) bool }, vs Values, n int) (res []KeyIdScore, jms []JoinMetrics) {
   288  	m.WalkDesc(clock.UnixMilli(), func(b *Range) bool {
   289  		jm := b.Join(vs, -1, true, func(kis KeyIdScore) bool {
   290  			if dedup.Add(kis.Key) {
   291  				res = append(res, kis)
   292  			}
   293  			return len(res) < n
   294  		})
   295  		jms = append(jms, jm)
   296  		return len(res) < n
   297  	})
   298  	return
   299  }