github.com/vipernet-xyz/tendermint-core@v0.32.0/libs/autofile/group.go (about) 1 package autofile 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "path/filepath" 10 "regexp" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/tendermint/tendermint/libs/service" 17 ) 18 19 const ( 20 defaultGroupCheckDuration = 5000 * time.Millisecond 21 defaultHeadSizeLimit = 10 * 1024 * 1024 // 10MB 22 defaultTotalSizeLimit = 1 * 1024 * 1024 * 1024 // 1GB 23 maxFilesToRemove = 4 // needs to be greater than 1 24 ) 25 26 /* 27 You can open a Group to keep restrictions on an AutoFile, like 28 the maximum size of each chunk, and/or the total amount of bytes 29 stored in the group. 30 31 The first file to be written in the Group.Dir is the head file. 32 33 Dir/ 34 - <HeadPath> 35 36 Once the Head file reaches the size limit, it will be rotated. 37 38 Dir/ 39 - <HeadPath>.000 // First rolled file 40 - <HeadPath> // New head path, starts empty. 41 // The implicit index is 001. 42 43 As more files are written, the index numbers grow... 44 45 Dir/ 46 - <HeadPath>.000 // First rolled file 47 - <HeadPath>.001 // Second rolled file 48 - ... 49 - <HeadPath> // New head path 50 51 The Group can also be used to binary-search for some line, 52 assuming that marker lines are written occasionally. 53 */ 54 type Group struct { 55 service.BaseService 56 57 ID string 58 Head *AutoFile // The head AutoFile to write to 59 headBuf *bufio.Writer 60 Dir string // Directory that contains .Head 61 ticker *time.Ticker 62 mtx sync.Mutex 63 headSizeLimit int64 64 totalSizeLimit int64 65 groupCheckDuration time.Duration 66 minIndex int // Includes head 67 maxIndex int // Includes head, where Head will move to 68 69 // close this when the processTicks routine is done. 70 // this ensures we can cleanup the dir after calling Stop 71 // and the routine won't be trying to access it anymore 72 doneProcessTicks chan struct{} 73 74 // TODO: When we start deleting files, we need to start tracking GroupReaders 75 // and their dependencies. 76 } 77 78 // OpenGroup creates a new Group with head at headPath. It returns an error if 79 // it fails to open head file. 80 func OpenGroup(headPath string, groupOptions ...func(*Group)) (*Group, error) { 81 dir, err := filepath.Abs(filepath.Dir(headPath)) 82 if err != nil { 83 return nil, err 84 } 85 head, err := OpenAutoFile(headPath) 86 if err != nil { 87 return nil, err 88 } 89 90 g := &Group{ 91 ID: "group:" + head.ID, 92 Head: head, 93 headBuf: bufio.NewWriterSize(head, 4096*10), 94 Dir: dir, 95 headSizeLimit: defaultHeadSizeLimit, 96 totalSizeLimit: defaultTotalSizeLimit, 97 groupCheckDuration: defaultGroupCheckDuration, 98 minIndex: 0, 99 maxIndex: 0, 100 doneProcessTicks: make(chan struct{}), 101 } 102 103 for _, option := range groupOptions { 104 option(g) 105 } 106 107 g.BaseService = *service.NewBaseService(nil, "Group", g) 108 109 gInfo := g.readGroupInfo() 110 g.minIndex = gInfo.MinIndex 111 g.maxIndex = gInfo.MaxIndex 112 return g, nil 113 } 114 115 // GroupCheckDuration allows you to overwrite default groupCheckDuration. 116 func GroupCheckDuration(duration time.Duration) func(*Group) { 117 return func(g *Group) { 118 g.groupCheckDuration = duration 119 } 120 } 121 122 // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB. 123 func GroupHeadSizeLimit(limit int64) func(*Group) { 124 return func(g *Group) { 125 g.headSizeLimit = limit 126 } 127 } 128 129 // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB. 130 func GroupTotalSizeLimit(limit int64) func(*Group) { 131 return func(g *Group) { 132 g.totalSizeLimit = limit 133 } 134 } 135 136 // OnStart implements service.Service by starting the goroutine that checks file 137 // and group limits. 138 func (g *Group) OnStart() error { 139 g.ticker = time.NewTicker(g.groupCheckDuration) 140 go g.processTicks() 141 return nil 142 } 143 144 // OnStop implements service.Service by stopping the goroutine described above. 145 // NOTE: g.Head must be closed separately using Close. 146 func (g *Group) OnStop() { 147 g.ticker.Stop() 148 g.FlushAndSync() 149 } 150 151 // Wait blocks until all internal goroutines are finished. Supposed to be 152 // called after Stop. 153 func (g *Group) Wait() { 154 // wait for processTicks routine to finish 155 <-g.doneProcessTicks 156 } 157 158 // Close closes the head file. The group must be stopped by this moment. 159 func (g *Group) Close() { 160 g.FlushAndSync() 161 162 g.mtx.Lock() 163 _ = g.Head.closeFile() 164 g.mtx.Unlock() 165 } 166 167 // HeadSizeLimit returns the current head size limit. 168 func (g *Group) HeadSizeLimit() int64 { 169 g.mtx.Lock() 170 defer g.mtx.Unlock() 171 return g.headSizeLimit 172 } 173 174 // TotalSizeLimit returns total size limit of the group. 175 func (g *Group) TotalSizeLimit() int64 { 176 g.mtx.Lock() 177 defer g.mtx.Unlock() 178 return g.totalSizeLimit 179 } 180 181 // MaxIndex returns index of the last file in the group. 182 func (g *Group) MaxIndex() int { 183 g.mtx.Lock() 184 defer g.mtx.Unlock() 185 return g.maxIndex 186 } 187 188 // MinIndex returns index of the first file in the group. 189 func (g *Group) MinIndex() int { 190 g.mtx.Lock() 191 defer g.mtx.Unlock() 192 return g.minIndex 193 } 194 195 // Write writes the contents of p into the current head of the group. It 196 // returns the number of bytes written. If nn < len(p), it also returns an 197 // error explaining why the write is short. 198 // NOTE: Writes are buffered so they don't write synchronously 199 // TODO: Make it halt if space is unavailable 200 func (g *Group) Write(p []byte) (nn int, err error) { 201 g.mtx.Lock() 202 defer g.mtx.Unlock() 203 return g.headBuf.Write(p) 204 } 205 206 // WriteLine writes line into the current head of the group. It also appends "\n". 207 // NOTE: Writes are buffered so they don't write synchronously 208 // TODO: Make it halt if space is unavailable 209 func (g *Group) WriteLine(line string) error { 210 g.mtx.Lock() 211 defer g.mtx.Unlock() 212 _, err := g.headBuf.Write([]byte(line + "\n")) 213 return err 214 } 215 216 // Buffered returns the size of the currently buffered data. 217 func (g *Group) Buffered() int { 218 g.mtx.Lock() 219 defer g.mtx.Unlock() 220 return g.headBuf.Buffered() 221 } 222 223 // FlushAndSync writes any buffered data to the underlying file and commits the 224 // current content of the file to stable storage (fsync). 225 func (g *Group) FlushAndSync() error { 226 g.mtx.Lock() 227 defer g.mtx.Unlock() 228 err := g.headBuf.Flush() 229 if err == nil { 230 err = g.Head.Sync() 231 } 232 return err 233 } 234 235 func (g *Group) processTicks() { 236 defer close(g.doneProcessTicks) 237 for { 238 select { 239 case <-g.ticker.C: 240 g.checkHeadSizeLimit() 241 g.checkTotalSizeLimit() 242 case <-g.Quit(): 243 return 244 } 245 } 246 } 247 248 // NOTE: this function is called manually in tests. 249 func (g *Group) checkHeadSizeLimit() { 250 limit := g.HeadSizeLimit() 251 if limit == 0 { 252 return 253 } 254 size, err := g.Head.Size() 255 if err != nil { 256 g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err) 257 return 258 } 259 if size >= limit { 260 g.RotateFile() 261 } 262 } 263 264 func (g *Group) checkTotalSizeLimit() { 265 limit := g.TotalSizeLimit() 266 if limit == 0 { 267 return 268 } 269 270 gInfo := g.readGroupInfo() 271 totalSize := gInfo.TotalSize 272 for i := 0; i < maxFilesToRemove; i++ { 273 index := gInfo.MinIndex + i 274 if totalSize < limit { 275 return 276 } 277 if index == gInfo.MaxIndex { 278 // Special degenerate case, just do nothing. 279 g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path) 280 return 281 } 282 pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex) 283 fInfo, err := os.Stat(pathToRemove) 284 if err != nil { 285 g.Logger.Error("Failed to fetch info for file", "file", pathToRemove) 286 continue 287 } 288 err = os.Remove(pathToRemove) 289 if err != nil { 290 g.Logger.Error("Failed to remove path", "path", pathToRemove) 291 return 292 } 293 totalSize -= fInfo.Size() 294 } 295 } 296 297 // RotateFile causes group to close the current head and assign it some index. 298 // Note it does not create a new head. 299 func (g *Group) RotateFile() { 300 g.mtx.Lock() 301 defer g.mtx.Unlock() 302 303 headPath := g.Head.Path 304 305 if err := g.headBuf.Flush(); err != nil { 306 panic(err) 307 } 308 309 if err := g.Head.Sync(); err != nil { 310 panic(err) 311 } 312 313 if err := g.Head.closeFile(); err != nil { 314 panic(err) 315 } 316 317 indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1) 318 if err := os.Rename(headPath, indexPath); err != nil { 319 panic(err) 320 } 321 322 g.maxIndex++ 323 } 324 325 // NewReader returns a new group reader. 326 // CONTRACT: Caller must close the returned GroupReader. 327 func (g *Group) NewReader(index int) (*GroupReader, error) { 328 r := newGroupReader(g) 329 err := r.SetIndex(index) 330 if err != nil { 331 return nil, err 332 } 333 return r, nil 334 } 335 336 // GroupInfo holds information about the group. 337 type GroupInfo struct { 338 MinIndex int // index of the first file in the group, including head 339 MaxIndex int // index of the last file in the group, including head 340 TotalSize int64 // total size of the group 341 HeadSize int64 // size of the head 342 } 343 344 // Returns info after scanning all files in g.Head's dir. 345 func (g *Group) ReadGroupInfo() GroupInfo { 346 g.mtx.Lock() 347 defer g.mtx.Unlock() 348 return g.readGroupInfo() 349 } 350 351 // Index includes the head. 352 // CONTRACT: caller should have called g.mtx.Lock 353 func (g *Group) readGroupInfo() GroupInfo { 354 groupDir := filepath.Dir(g.Head.Path) 355 headBase := filepath.Base(g.Head.Path) 356 var minIndex, maxIndex int = -1, -1 357 var totalSize, headSize int64 = 0, 0 358 359 dir, err := os.Open(groupDir) 360 if err != nil { 361 panic(err) 362 } 363 defer dir.Close() 364 fiz, err := dir.Readdir(0) 365 if err != nil { 366 panic(err) 367 } 368 369 // For each file in the directory, filter by pattern 370 for _, fileInfo := range fiz { 371 if fileInfo.Name() == headBase { 372 fileSize := fileInfo.Size() 373 totalSize += fileSize 374 headSize = fileSize 375 continue 376 } else if strings.HasPrefix(fileInfo.Name(), headBase) { 377 fileSize := fileInfo.Size() 378 totalSize += fileSize 379 indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`) 380 submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name())) 381 if len(submatch) != 0 { 382 // Matches 383 fileIndex, err := strconv.Atoi(string(submatch[1])) 384 if err != nil { 385 panic(err) 386 } 387 if maxIndex < fileIndex { 388 maxIndex = fileIndex 389 } 390 if minIndex == -1 || fileIndex < minIndex { 391 minIndex = fileIndex 392 } 393 } 394 } 395 } 396 397 // Now account for the head. 398 if minIndex == -1 { 399 // If there were no numbered files, 400 // then the head is index 0. 401 minIndex, maxIndex = 0, 0 402 } else { 403 // Otherwise, the head file is 1 greater 404 maxIndex++ 405 } 406 return GroupInfo{minIndex, maxIndex, totalSize, headSize} 407 } 408 409 func filePathForIndex(headPath string, index int, maxIndex int) string { 410 if index == maxIndex { 411 return headPath 412 } 413 return fmt.Sprintf("%v.%03d", headPath, index) 414 } 415 416 //-------------------------------------------------------------------------------- 417 418 // GroupReader provides an interface for reading from a Group. 419 type GroupReader struct { 420 *Group 421 mtx sync.Mutex 422 curIndex int 423 curFile *os.File 424 curReader *bufio.Reader 425 curLine []byte 426 } 427 428 func newGroupReader(g *Group) *GroupReader { 429 return &GroupReader{ 430 Group: g, 431 curIndex: 0, 432 curFile: nil, 433 curReader: nil, 434 curLine: nil, 435 } 436 } 437 438 // Close closes the GroupReader by closing the cursor file. 439 func (gr *GroupReader) Close() error { 440 gr.mtx.Lock() 441 defer gr.mtx.Unlock() 442 443 if gr.curReader != nil { 444 err := gr.curFile.Close() 445 gr.curIndex = 0 446 gr.curReader = nil 447 gr.curFile = nil 448 gr.curLine = nil 449 return err 450 } 451 return nil 452 } 453 454 // Read implements io.Reader, reading bytes from the current Reader 455 // incrementing index until enough bytes are read. 456 func (gr *GroupReader) Read(p []byte) (n int, err error) { 457 lenP := len(p) 458 if lenP == 0 { 459 return 0, errors.New("given empty slice") 460 } 461 462 gr.mtx.Lock() 463 defer gr.mtx.Unlock() 464 465 // Open file if not open yet 466 if gr.curReader == nil { 467 if err = gr.openFile(gr.curIndex); err != nil { 468 return 0, err 469 } 470 } 471 472 // Iterate over files until enough bytes are read 473 var nn int 474 for { 475 nn, err = gr.curReader.Read(p[n:]) 476 n += nn 477 switch { 478 case err == io.EOF: 479 if n >= lenP { 480 return n, nil 481 } 482 // Open the next file 483 if err1 := gr.openFile(gr.curIndex + 1); err1 != nil { 484 return n, err1 485 } 486 case err != nil: 487 return n, err 488 case nn == 0: // empty file 489 return n, err 490 } 491 } 492 } 493 494 // IF index > gr.Group.maxIndex, returns io.EOF 495 // CONTRACT: caller should hold gr.mtx 496 func (gr *GroupReader) openFile(index int) error { 497 // Lock on Group to ensure that head doesn't move in the meanwhile. 498 gr.Group.mtx.Lock() 499 defer gr.Group.mtx.Unlock() 500 501 if index > gr.Group.maxIndex { 502 return io.EOF 503 } 504 505 curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex) 506 curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms) 507 if err != nil { 508 return err 509 } 510 curReader := bufio.NewReader(curFile) 511 512 // Update gr.cur* 513 if gr.curFile != nil { 514 gr.curFile.Close() // TODO return error? 515 } 516 gr.curIndex = index 517 gr.curFile = curFile 518 gr.curReader = curReader 519 gr.curLine = nil 520 return nil 521 } 522 523 // CurIndex returns cursor's file index. 524 func (gr *GroupReader) CurIndex() int { 525 gr.mtx.Lock() 526 defer gr.mtx.Unlock() 527 return gr.curIndex 528 } 529 530 // SetIndex sets the cursor's file index to index by opening a file at this 531 // position. 532 func (gr *GroupReader) SetIndex(index int) error { 533 gr.mtx.Lock() 534 defer gr.mtx.Unlock() 535 return gr.openFile(index) 536 }