github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/internal/libs/autofile/group.go (about) 1 package autofile 2 3 import ( 4 "bufio" 5 "context" 6 "errors" 7 "fmt" 8 "io" 9 "os" 10 "path/filepath" 11 "regexp" 12 "strconv" 13 "strings" 14 "sync" 15 "time" 16 17 "github.com/ari-anchor/sei-tendermint/libs/log" 18 "github.com/ari-anchor/sei-tendermint/libs/service" 19 ) 20 21 const ( 22 defaultGroupCheckDuration = 5000 * time.Millisecond 23 defaultHeadSizeLimit = 10 * 1024 * 1024 // 10MB 24 defaultTotalSizeLimit = 1 * 1024 * 1024 * 1024 // 1GB 25 maxFilesToRemove = 4 // needs to be greater than 1 26 ) 27 28 /* 29 You can open a Group to keep restrictions on an AutoFile, like 30 the maximum size of each chunk, and/or the total amount of bytes 31 stored in the group. 32 33 The first file to be written in the Group.Dir is the head file. 34 35 Dir/ 36 - <HeadPath> 37 38 Once the Head file reaches the size limit, it will be rotated. 39 40 Dir/ 41 - <HeadPath>.000 // First rolled file 42 - <HeadPath> // New head path, starts empty. 43 // The implicit index is 001. 44 45 As more files are written, the index numbers grow... 46 47 Dir/ 48 - <HeadPath>.000 // First rolled file 49 - <HeadPath>.001 // Second rolled file 50 - ... 51 - <HeadPath> // New head path 52 53 The Group can also be used to binary-search for some line, 54 assuming that marker lines are written occasionally. 55 */ 56 type Group struct { 57 service.BaseService 58 logger log.Logger 59 60 ID string 61 Head *AutoFile // The head AutoFile to write to 62 headBuf *bufio.Writer 63 Dir string // Directory that contains .Head 64 ticker *time.Ticker 65 mtx sync.Mutex 66 headSizeLimit int64 67 totalSizeLimit int64 68 groupCheckDuration time.Duration 69 minIndex int // Includes head 70 maxIndex int // Includes head, where Head will move to 71 72 // TODO: When we start deleting files, we need to start tracking GroupReaders 73 // and their dependencies. 74 } 75 76 // OpenGroup creates a new Group with head at headPath. It returns an error if 77 // it fails to open head file. 78 func OpenGroup(ctx context.Context, logger log.Logger, headPath string, groupOptions ...func(*Group)) (*Group, error) { 79 dir, err := filepath.Abs(filepath.Dir(headPath)) 80 if err != nil { 81 return nil, err 82 } 83 head, err := OpenAutoFile(ctx, headPath) 84 if err != nil { 85 return nil, err 86 } 87 88 g := &Group{ 89 logger: logger, 90 ID: "group:" + head.ID, 91 Head: head, 92 headBuf: bufio.NewWriterSize(head, 4096*10), 93 Dir: dir, 94 headSizeLimit: defaultHeadSizeLimit, 95 totalSizeLimit: defaultTotalSizeLimit, 96 groupCheckDuration: defaultGroupCheckDuration, 97 minIndex: 0, 98 maxIndex: 0, 99 } 100 101 for _, option := range groupOptions { 102 option(g) 103 } 104 105 g.BaseService = *service.NewBaseService(logger, "Group", g) 106 107 gInfo := g.readGroupInfo() 108 g.minIndex = gInfo.MinIndex 109 g.maxIndex = gInfo.MaxIndex 110 return g, nil 111 } 112 113 // GroupCheckDuration allows you to overwrite default groupCheckDuration. 114 func GroupCheckDuration(duration time.Duration) func(*Group) { 115 return func(g *Group) { 116 g.groupCheckDuration = duration 117 } 118 } 119 120 // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB. 121 func GroupHeadSizeLimit(limit int64) func(*Group) { 122 return func(g *Group) { 123 g.headSizeLimit = limit 124 } 125 } 126 127 // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB. 128 func GroupTotalSizeLimit(limit int64) func(*Group) { 129 return func(g *Group) { 130 g.totalSizeLimit = limit 131 } 132 } 133 134 // OnStart implements service.Service by starting the goroutine that checks file 135 // and group limits. 136 func (g *Group) OnStart(ctx context.Context) error { 137 g.ticker = time.NewTicker(g.groupCheckDuration) 138 go g.processTicks(ctx) 139 return nil 140 } 141 142 // OnStop implements service.Service by stopping the goroutine described above. 143 // NOTE: g.Head must be closed separately using Close. 144 func (g *Group) OnStop() { 145 g.ticker.Stop() 146 if err := g.FlushAndSync(); err != nil { 147 g.logger.Error("error flushing to disk", "err", err) 148 } 149 } 150 151 // Close closes the head file. The group must be stopped by this moment. 152 func (g *Group) Close() { 153 if err := g.FlushAndSync(); err != nil { 154 g.logger.Error("error flushing to disk", "err", err) 155 } 156 157 g.mtx.Lock() 158 _ = g.Head.Close() 159 g.mtx.Unlock() 160 } 161 162 // HeadSizeLimit returns the current head size limit. 163 func (g *Group) HeadSizeLimit() int64 { 164 g.mtx.Lock() 165 defer g.mtx.Unlock() 166 return g.headSizeLimit 167 } 168 169 // TotalSizeLimit returns total size limit of the group. 170 func (g *Group) TotalSizeLimit() int64 { 171 g.mtx.Lock() 172 defer g.mtx.Unlock() 173 return g.totalSizeLimit 174 } 175 176 // MaxIndex returns index of the last file in the group. 177 func (g *Group) MaxIndex() int { 178 g.mtx.Lock() 179 defer g.mtx.Unlock() 180 return g.maxIndex 181 } 182 183 // MinIndex returns index of the first file in the group. 184 func (g *Group) MinIndex() int { 185 g.mtx.Lock() 186 defer g.mtx.Unlock() 187 return g.minIndex 188 } 189 190 // Write writes the contents of p into the current head of the group. It 191 // returns the number of bytes written. If nn < len(p), it also returns an 192 // error explaining why the write is short. 193 // NOTE: Writes are buffered so they don't write synchronously 194 // TODO: Make it halt if space is unavailable 195 func (g *Group) Write(p []byte) (nn int, err error) { 196 g.mtx.Lock() 197 defer g.mtx.Unlock() 198 return g.headBuf.Write(p) 199 } 200 201 // WriteLine writes line into the current head of the group. It also appends "\n". 202 // NOTE: Writes are buffered so they don't write synchronously 203 // TODO: Make it halt if space is unavailable 204 func (g *Group) WriteLine(line string) error { 205 g.mtx.Lock() 206 defer g.mtx.Unlock() 207 _, err := g.headBuf.Write([]byte(line + "\n")) 208 return err 209 } 210 211 // Buffered returns the size of the currently buffered data. 212 func (g *Group) Buffered() int { 213 g.mtx.Lock() 214 defer g.mtx.Unlock() 215 return g.headBuf.Buffered() 216 } 217 218 // FlushAndSync writes any buffered data to the underlying file and commits the 219 // current content of the file to stable storage (fsync). 220 func (g *Group) FlushAndSync() error { 221 g.mtx.Lock() 222 defer g.mtx.Unlock() 223 err := g.headBuf.Flush() 224 if err == nil { 225 err = g.Head.Sync() 226 } 227 return err 228 } 229 230 func (g *Group) processTicks(ctx context.Context) { 231 for { 232 select { 233 case <-ctx.Done(): 234 return 235 case <-g.ticker.C: 236 g.checkHeadSizeLimit(ctx) 237 g.checkTotalSizeLimit(ctx) 238 } 239 } 240 } 241 242 // NOTE: this function is called manually in tests. 243 func (g *Group) checkHeadSizeLimit(ctx context.Context) { 244 limit := g.HeadSizeLimit() 245 if limit == 0 { 246 return 247 } 248 size, err := g.Head.Size() 249 if err != nil { 250 g.logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err) 251 return 252 } 253 if size >= limit { 254 g.rotateFile(ctx) 255 } 256 } 257 258 func (g *Group) checkTotalSizeLimit(ctx context.Context) { 259 g.mtx.Lock() 260 defer g.mtx.Unlock() 261 262 if err := ctx.Err(); err != nil { 263 return 264 } 265 266 if g.totalSizeLimit == 0 { 267 return 268 } 269 270 gInfo := g.readGroupInfo() 271 totalSize := gInfo.TotalSize 272 for i := 0; i < maxFilesToRemove; i++ { 273 index := gInfo.MinIndex + i 274 if totalSize < g.totalSizeLimit { 275 return 276 } 277 if index == gInfo.MaxIndex { 278 // Special degenerate case, just do nothing. 279 g.logger.Error("Group's head may grow without bound", "head", g.Head.Path) 280 return 281 } 282 283 if ctx.Err() != nil { 284 return 285 } 286 287 pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex) 288 fInfo, err := os.Stat(pathToRemove) 289 if err != nil { 290 g.logger.Error("Failed to fetch info for file", "file", pathToRemove) 291 continue 292 } 293 294 if ctx.Err() != nil { 295 return 296 } 297 298 if err = os.Remove(pathToRemove); err != nil { 299 g.logger.Error("Failed to remove path", "path", pathToRemove) 300 return 301 } 302 totalSize -= fInfo.Size() 303 } 304 } 305 306 // rotateFile causes group to close the current head and assign it 307 // some index. Panics if it encounters an error. 308 func (g *Group) rotateFile(ctx context.Context) { 309 g.mtx.Lock() 310 defer g.mtx.Unlock() 311 312 if err := ctx.Err(); err != nil { 313 return 314 } 315 316 headPath := g.Head.Path 317 318 if err := g.headBuf.Flush(); err != nil { 319 panic(err) 320 } 321 if err := g.Head.Sync(); err != nil { 322 panic(err) 323 } 324 err := g.Head.withLock(func() error { 325 if err := ctx.Err(); err != nil { 326 return err 327 } 328 329 if err := g.Head.unsyncCloseFile(); err != nil { 330 return err 331 } 332 333 indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1) 334 return os.Rename(headPath, indexPath) 335 }) 336 if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { 337 return 338 } 339 if err != nil { 340 panic(err) 341 } 342 343 g.maxIndex++ 344 } 345 346 // NewReader returns a new group reader. 347 // CONTRACT: Caller must close the returned GroupReader. 348 func (g *Group) NewReader(index int) (*GroupReader, error) { 349 r := newGroupReader(g) 350 err := r.SetIndex(index) 351 if err != nil { 352 return nil, err 353 } 354 return r, nil 355 } 356 357 // GroupInfo holds information about the group. 358 type GroupInfo struct { 359 MinIndex int // index of the first file in the group, including head 360 MaxIndex int // index of the last file in the group, including head 361 TotalSize int64 // total size of the group 362 HeadSize int64 // size of the head 363 } 364 365 // Returns info after scanning all files in g.Head's dir. 366 func (g *Group) ReadGroupInfo() GroupInfo { 367 g.mtx.Lock() 368 defer g.mtx.Unlock() 369 return g.readGroupInfo() 370 } 371 372 // Index includes the head. 373 // CONTRACT: caller should have called g.mtx.Lock 374 func (g *Group) readGroupInfo() GroupInfo { 375 groupDir := filepath.Dir(g.Head.Path) 376 headBase := filepath.Base(g.Head.Path) 377 var minIndex, maxIndex int = -1, -1 378 var totalSize, headSize int64 = 0, 0 379 380 dir, err := os.Open(groupDir) 381 if err != nil { 382 panic(err) 383 } 384 defer dir.Close() 385 fiz, err := dir.Readdir(0) 386 if err != nil { 387 panic(err) 388 } 389 390 // For each file in the directory, filter by pattern 391 for _, fileInfo := range fiz { 392 if fileInfo.Name() == headBase { 393 fileSize := fileInfo.Size() 394 totalSize += fileSize 395 headSize = fileSize 396 continue 397 } else if strings.HasPrefix(fileInfo.Name(), headBase) { 398 fileSize := fileInfo.Size() 399 totalSize += fileSize 400 indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`) 401 submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name())) 402 if len(submatch) != 0 { 403 // Matches 404 fileIndex, err := strconv.Atoi(string(submatch[1])) 405 if err != nil { 406 panic(err) 407 } 408 if maxIndex < fileIndex { 409 maxIndex = fileIndex 410 } 411 if minIndex == -1 || fileIndex < minIndex { 412 minIndex = fileIndex 413 } 414 } 415 } 416 } 417 418 // Now account for the head. 419 if minIndex == -1 { 420 // If there were no numbered files, 421 // then the head is index 0. 422 minIndex, maxIndex = 0, 0 423 } else { 424 // Otherwise, the head file is 1 greater 425 maxIndex++ 426 } 427 return GroupInfo{minIndex, maxIndex, totalSize, headSize} 428 } 429 430 func filePathForIndex(headPath string, index int, maxIndex int) string { 431 if index == maxIndex { 432 return headPath 433 } 434 return fmt.Sprintf("%v.%03d", headPath, index) 435 } 436 437 //-------------------------------------------------------------------------------- 438 439 // GroupReader provides an interface for reading from a Group. 440 type GroupReader struct { 441 *Group 442 mtx sync.Mutex 443 curIndex int 444 curFile *os.File 445 curReader *bufio.Reader 446 curLine []byte 447 } 448 449 func newGroupReader(g *Group) *GroupReader { 450 return &GroupReader{ 451 Group: g, 452 curIndex: 0, 453 curFile: nil, 454 curReader: nil, 455 curLine: nil, 456 } 457 } 458 459 // Close closes the GroupReader by closing the cursor file. 460 func (gr *GroupReader) Close() error { 461 gr.mtx.Lock() 462 defer gr.mtx.Unlock() 463 464 if gr.curReader != nil { 465 err := gr.curFile.Close() 466 gr.curIndex = 0 467 gr.curReader = nil 468 gr.curFile = nil 469 gr.curLine = nil 470 return err 471 } 472 return nil 473 } 474 475 // Read implements io.Reader, reading bytes from the current Reader 476 // incrementing index until enough bytes are read. 477 func (gr *GroupReader) Read(p []byte) (n int, err error) { 478 lenP := len(p) 479 if lenP == 0 { 480 return 0, errors.New("given empty slice") 481 } 482 483 gr.mtx.Lock() 484 defer gr.mtx.Unlock() 485 486 // Open file if not open yet 487 if gr.curReader == nil { 488 if err = gr.openFile(gr.curIndex); err != nil { 489 return 0, err 490 } 491 } 492 493 // Iterate over files until enough bytes are read 494 var nn int 495 for { 496 nn, err = gr.curReader.Read(p[n:]) 497 n += nn 498 switch { 499 case err == io.EOF: 500 if n >= lenP { 501 return n, nil 502 } 503 // Open the next file 504 if err1 := gr.openFile(gr.curIndex + 1); err1 != nil { 505 return n, err1 506 } 507 case err != nil: 508 return n, err 509 case nn == 0: // empty file 510 return n, err 511 } 512 } 513 } 514 515 // IF index > gr.Group.maxIndex, returns io.EOF 516 // CONTRACT: caller should hold gr.mtx 517 func (gr *GroupReader) openFile(index int) error { 518 // Lock on Group to ensure that head doesn't move in the meanwhile. 519 gr.Group.mtx.Lock() 520 defer gr.Group.mtx.Unlock() 521 522 if index > gr.Group.maxIndex { 523 return io.EOF 524 } 525 526 curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex) 527 curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms) 528 if err != nil { 529 return err 530 } 531 curReader := bufio.NewReader(curFile) 532 533 // Update gr.cur* 534 if gr.curFile != nil { 535 gr.curFile.Close() // TODO return error? 536 } 537 gr.curIndex = index 538 gr.curFile = curFile 539 gr.curReader = curReader 540 gr.curLine = nil 541 return nil 542 } 543 544 // CurIndex returns cursor's file index. 545 func (gr *GroupReader) CurIndex() int { 546 gr.mtx.Lock() 547 defer gr.mtx.Unlock() 548 return gr.curIndex 549 } 550 551 // SetIndex sets the cursor's file index to index by opening a file at this 552 // position. 553 func (gr *GroupReader) SetIndex(index int) error { 554 gr.mtx.Lock() 555 defer gr.mtx.Unlock() 556 return gr.openFile(index) 557 }