github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/bitmap/bitmap_mgr.go (about) 1 package bitmap 2 3 import ( 4 "fmt" 5 "io" 6 "os" 7 "path/filepath" 8 "runtime" 9 "sort" 10 "strconv" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/coyove/sdss/contrib/clock" 16 "golang.org/x/sync/singleflight" 17 ) 18 19 type Manager struct { 20 mu, reloadmu sync.Mutex 21 dirname string 22 switchLimit int64 23 dirFiles []string 24 current *SaveAggregator 25 loader singleflight.Group 26 cache *Cache 27 28 DirMaxFiles int 29 30 Event struct { 31 OnLoaded func(string, time.Duration) 32 OnSaved func(string, int, error, time.Duration) 33 OnMissing func(int64) (*Range, error) 34 } 35 } 36 37 func (m *Manager) getPath(base int64) string { 38 return filepath.Join(m.dirname, fmt.Sprintf("%016x", base)) 39 } 40 41 func (m *Manager) saveAggImpl(b *Range) error { 42 start := time.Now() 43 fn := m.getPath(b.Start()) 44 x, err := b.Save(fn, b.Len() >= m.switchLimit) 45 if err == nil { 46 if bs, ok := m.Last(); !ok || bs != b.Start() { 47 err = m.ReloadFiles() 48 } 49 } 50 if m.Event.OnSaved != nil { 51 m.Event.OnSaved(fn, x, err, time.Since(start)) 52 } 53 return err 54 } 55 56 func (m *Manager) load(offset int64) (*Range, error) { 57 if offset == m.current.Range().Start() { 58 return m.current.Range(), nil 59 } 60 fn := m.getPath(offset) 61 cached := m.cache.Get(fn) 62 if cached != nil { 63 return cached, nil 64 } 65 out, err, _ := m.loader.Do(fn, func() (interface{}, error) { 66 start := time.Now() 67 v, err := Load(fn) 68 if v == nil && err == nil { 69 return nil, nil 70 } 71 if m.Event.OnLoaded != nil { 72 m.Event.OnLoaded(fn, time.Since(start)) 73 } 74 return v, err 75 }) 76 if err != nil { 77 return nil, err 78 } 79 if out == nil { 80 return nil, nil 81 } 82 m.cache.Add(fn, out.(*Range)) 83 return out.(*Range), nil 84 } 85 86 func (m *Manager) findNext(mark int64) (int64, bool) { 87 marks := fmt.Sprintf("%016x", mark) 88 idx := sort.SearchStrings(m.dirFiles, marks) 89 if idx >= len(m.dirFiles) { 90 return 0, true 91 } 92 if m.dirFiles[idx] == marks { 93 idx++ 94 } 95 prev, _ := strconv.ParseInt(m.dirFiles[idx], 16, 64) 96 return prev, false 97 } 98 99 func (m *Manager) findPrev(mark int64) (int64, bool) { 100 marks := fmt.Sprintf("%016x", mark) 101 idx := sort.SearchStrings(m.dirFiles, marks) 102 if idx >= len(m.dirFiles) { 103 idx = len(m.dirFiles) 104 } 105 if idx == 0 { 106 return 0, true 107 } 108 prev, _ := strconv.ParseInt(m.dirFiles[idx-1], 16, 64) 109 return prev, false 110 } 111 112 func (m *Manager) Last() (int64, bool) { 113 m.reloadmu.Lock() 114 defer m.reloadmu.Unlock() 115 v, empty := m.findPrev(clock.UnixMilli() + 1) 116 return v, !empty 117 } 118 119 func (m *Manager) ReloadFiles() error { 120 m.reloadmu.Lock() 121 defer m.reloadmu.Unlock() 122 df, err := os.Open(m.dirname) 123 if err != nil { 124 return err 125 } 126 defer df.Close() 127 names, err := df.Readdirnames(-1) 128 if err != nil { 129 return err 130 } 131 132 for i := len(names) - 1; i >= 0; i-- { 133 if strings.HasSuffix(names[i], ".mtfbak") { 134 names = append(names[:i], names[i+1:]...) 135 } 136 } 137 138 sort.Strings(names) 139 if m.DirMaxFiles > 0 { 140 for len(names) > m.DirMaxFiles { 141 os.Remove(filepath.Join(m.dirname, names[0])) 142 names = names[1:] 143 } 144 } 145 146 for _, n := range names { 147 if _, err := strconv.ParseInt(n, 16, 64); err != nil { 148 return fmt.Errorf("invalid filename %s/%s: %v", m.dirname, n, err) 149 } 150 } 151 m.dirFiles = names 152 return nil 153 } 154 155 func NewManager(dir string, switchLimit int64, cache *Cache) (*Manager, error) { 156 if err := os.MkdirAll(dir, 0777); err != nil { 157 return nil, err 158 } 159 if cache == nil { 160 cache = NewLRUCache(0) 161 } 162 m := &Manager{ 163 dirname: dir, 164 cache: cache, 165 switchLimit: switchLimit, 166 } 167 if err := m.ReloadFiles(); err != nil { 168 return nil, err 169 } 170 171 normBase := clock.UnixMilli() 172 prevBase, isEmpty := m.findPrev(normBase + 1) 173 if isEmpty { 174 m.current = New(normBase).AggregateSaves(m.saveAggImpl) 175 } else { 176 b, err := Load(m.getPath(prevBase)) 177 if err != nil { 178 return nil, err 179 } 180 m.current = b.AggregateSaves(m.saveAggImpl) 181 } 182 return m, nil 183 } 184 185 func (m *Manager) Saver() *SaveAggregator { 186 m.mu.Lock() 187 defer m.mu.Unlock() 188 if m.current.Range().Len() >= m.switchLimit { 189 m.current.Close() 190 m.current = New(clock.UnixMilli()).AggregateSaves(m.saveAggImpl) 191 } 192 return m.current 193 } 194 195 func (m *Manager) WalkAsc(start int64, f func(*Range) bool) (err error) { 196 for { 197 if start == 0 { 198 // Since we use unix milli as the filename, 0 can't be a legal one. 199 start = 1 200 } 201 next, isLast := m.findNext(start - 1) 202 if isLast { 203 return io.EOF 204 } 205 b, err := m.load(next) 206 if err != nil { 207 return err 208 } 209 if b != nil && !f(b) { 210 return nil 211 } 212 start = next + 1 213 } 214 } 215 216 func (m *Manager) WalkDesc(start int64, f func(*Range) bool) (err error) { 217 for { 218 var b *Range 219 220 prev, isFirst := m.findPrev(start + 1) 221 if isFirst { 222 if m.Event.OnMissing != nil { 223 b, err = m.Event.OnMissing(start + 1) 224 goto LOADED 225 } 226 return io.EOF 227 } 228 b, err = m.load(prev) 229 230 LOADED: 231 if err != nil { 232 return err 233 } 234 if b != nil && !f(b) { 235 return nil 236 } 237 start = prev - 1 238 } 239 } 240 241 func (m *Manager) MultiWalkDesc(start int64, f func(*Range) bool) (err error) { 242 w := runtime.NumCPU() 243 for { 244 var starts []int64 245 for i := 0; i < w; i++ { 246 prev, isFirst := m.findPrev(start + 1) 247 if isFirst { 248 break 249 } 250 starts = append(starts, prev) 251 start = prev - 1 252 } 253 254 if len(starts) == 0 { 255 return io.EOF 256 } 257 258 var exited bool 259 var outErr error 260 var wg sync.WaitGroup 261 wg.Add(len(starts)) 262 for _, s := range starts { 263 go func(s int64) { 264 defer wg.Done() 265 b, err := m.load(s) 266 if err != nil { 267 exited, outErr = true, err 268 return 269 } 270 if b != nil && !f(b) { 271 exited = true 272 } 273 }(s) 274 } 275 wg.Wait() 276 if exited { 277 return outErr 278 } 279 } 280 } 281 282 func (m *Manager) String() string { 283 return fmt.Sprintf("files: %d, saver: %.1f, cache: %d(%db)", 284 len(m.dirFiles), m.current.Metrics(), m.cache.Len(), m.cache.curWeight) 285 } 286 287 func (m *Manager) CollectSimple(dedup interface{ Add(Key) bool }, vs Values, n int) (res []KeyIdScore, jms []JoinMetrics) { 288 m.WalkDesc(clock.UnixMilli(), func(b *Range) bool { 289 jm := b.Join(vs, -1, true, func(kis KeyIdScore) bool { 290 if dedup.Add(kis.Key) { 291 res = append(res, kis) 292 } 293 return len(res) < n 294 }) 295 jms = append(jms, jm) 296 return len(res) < n 297 }) 298 return 299 }