github.com/evdatsion/aphelion-dpos-bft@v0.32.1/libs/autofile/group.go (about) 1 package autofile 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "path" 10 "path/filepath" 11 "regexp" 12 "strconv" 13 "strings" 14 "sync" 15 "time" 16 17 cmn "github.com/evdatsion/aphelion-dpos-bft/libs/common" 18 ) 19 20 const ( 21 defaultGroupCheckDuration = 5000 * time.Millisecond 22 defaultHeadSizeLimit = 10 * 1024 * 1024 // 10MB 23 defaultTotalSizeLimit = 1 * 1024 * 1024 * 1024 // 1GB 24 maxFilesToRemove = 4 // needs to be greater than 1 25 ) 26 27 /* 28 You can open a Group to keep restrictions on an AutoFile, like 29 the maximum size of each chunk, and/or the total amount of bytes 30 stored in the group. 31 32 The first file to be written in the Group.Dir is the head file. 33 34 Dir/ 35 - <HeadPath> 36 37 Once the Head file reaches the size limit, it will be rotated. 38 39 Dir/ 40 - <HeadPath>.000 // First rolled file 41 - <HeadPath> // New head path, starts empty. 42 // The implicit index is 001. 43 44 As more files are written, the index numbers grow... 45 46 Dir/ 47 - <HeadPath>.000 // First rolled file 48 - <HeadPath>.001 // Second rolled file 49 - ... 50 - <HeadPath> // New head path 51 52 The Group can also be used to binary-search for some line, 53 assuming that marker lines are written occasionally. 54 */ 55 type Group struct { 56 cmn.BaseService 57 58 ID string 59 Head *AutoFile // The head AutoFile to write to 60 headBuf *bufio.Writer 61 Dir string // Directory that contains .Head 62 ticker *time.Ticker 63 mtx sync.Mutex 64 headSizeLimit int64 65 totalSizeLimit int64 66 groupCheckDuration time.Duration 67 minIndex int // Includes head 68 maxIndex int // Includes head, where Head will move to 69 70 // close this when the processTicks routine is done. 71 // this ensures we can cleanup the dir after calling Stop 72 // and the routine won't be trying to access it anymore 73 doneProcessTicks chan struct{} 74 75 // TODO: When we start deleting files, we need to start tracking GroupReaders 76 // and their dependencies. 77 } 78 79 // OpenGroup creates a new Group with head at headPath. It returns an error if 80 // it fails to open head file. 81 func OpenGroup(headPath string, groupOptions ...func(*Group)) (g *Group, err error) { 82 dir := path.Dir(headPath) 83 head, err := OpenAutoFile(headPath) 84 if err != nil { 85 return nil, err 86 } 87 88 g = &Group{ 89 ID: "group:" + head.ID, 90 Head: head, 91 headBuf: bufio.NewWriterSize(head, 4096*10), 92 Dir: dir, 93 headSizeLimit: defaultHeadSizeLimit, 94 totalSizeLimit: defaultTotalSizeLimit, 95 groupCheckDuration: defaultGroupCheckDuration, 96 minIndex: 0, 97 maxIndex: 0, 98 doneProcessTicks: make(chan struct{}), 99 } 100 101 for _, option := range groupOptions { 102 option(g) 103 } 104 105 g.BaseService = *cmn.NewBaseService(nil, "Group", g) 106 107 gInfo := g.readGroupInfo() 108 g.minIndex = gInfo.MinIndex 109 g.maxIndex = gInfo.MaxIndex 110 return 111 } 112 113 // GroupCheckDuration allows you to overwrite default groupCheckDuration. 114 func GroupCheckDuration(duration time.Duration) func(*Group) { 115 return func(g *Group) { 116 g.groupCheckDuration = duration 117 } 118 } 119 120 // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB. 121 func GroupHeadSizeLimit(limit int64) func(*Group) { 122 return func(g *Group) { 123 g.headSizeLimit = limit 124 } 125 } 126 127 // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB. 128 func GroupTotalSizeLimit(limit int64) func(*Group) { 129 return func(g *Group) { 130 g.totalSizeLimit = limit 131 } 132 } 133 134 // OnStart implements cmn.Service by starting the goroutine that checks file 135 // and group limits. 136 func (g *Group) OnStart() error { 137 g.ticker = time.NewTicker(g.groupCheckDuration) 138 go g.processTicks() 139 return nil 140 } 141 142 // OnStop implements cmn.Service by stopping the goroutine described above. 143 // NOTE: g.Head must be closed separately using Close. 144 func (g *Group) OnStop() { 145 g.ticker.Stop() 146 g.FlushAndSync() 147 } 148 149 // Wait blocks until all internal goroutines are finished. Supposed to be 150 // called after Stop. 151 func (g *Group) Wait() { 152 // wait for processTicks routine to finish 153 <-g.doneProcessTicks 154 } 155 156 // Close closes the head file. The group must be stopped by this moment. 157 func (g *Group) Close() { 158 g.FlushAndSync() 159 160 g.mtx.Lock() 161 _ = g.Head.closeFile() 162 g.mtx.Unlock() 163 } 164 165 // HeadSizeLimit returns the current head size limit. 166 func (g *Group) HeadSizeLimit() int64 { 167 g.mtx.Lock() 168 defer g.mtx.Unlock() 169 return g.headSizeLimit 170 } 171 172 // TotalSizeLimit returns total size limit of the group. 173 func (g *Group) TotalSizeLimit() int64 { 174 g.mtx.Lock() 175 defer g.mtx.Unlock() 176 return g.totalSizeLimit 177 } 178 179 // MaxIndex returns index of the last file in the group. 180 func (g *Group) MaxIndex() int { 181 g.mtx.Lock() 182 defer g.mtx.Unlock() 183 return g.maxIndex 184 } 185 186 // MinIndex returns index of the first file in the group. 187 func (g *Group) MinIndex() int { 188 g.mtx.Lock() 189 defer g.mtx.Unlock() 190 return g.minIndex 191 } 192 193 // Write writes the contents of p into the current head of the group. It 194 // returns the number of bytes written. If nn < len(p), it also returns an 195 // error explaining why the write is short. 196 // NOTE: Writes are buffered so they don't write synchronously 197 // TODO: Make it halt if space is unavailable 198 func (g *Group) Write(p []byte) (nn int, err error) { 199 g.mtx.Lock() 200 defer g.mtx.Unlock() 201 return g.headBuf.Write(p) 202 } 203 204 // WriteLine writes line into the current head of the group. It also appends "\n". 205 // NOTE: Writes are buffered so they don't write synchronously 206 // TODO: Make it halt if space is unavailable 207 func (g *Group) WriteLine(line string) error { 208 g.mtx.Lock() 209 defer g.mtx.Unlock() 210 _, err := g.headBuf.Write([]byte(line + "\n")) 211 return err 212 } 213 214 // Buffered returns the size of the currently buffered data. 215 func (g *Group) Buffered() int { 216 g.mtx.Lock() 217 defer g.mtx.Unlock() 218 return g.headBuf.Buffered() 219 } 220 221 // FlushAndSync writes any buffered data to the underlying file and commits the 222 // current content of the file to stable storage (fsync). 223 func (g *Group) FlushAndSync() error { 224 g.mtx.Lock() 225 defer g.mtx.Unlock() 226 err := g.headBuf.Flush() 227 if err == nil { 228 err = g.Head.Sync() 229 } 230 return err 231 } 232 233 func (g *Group) processTicks() { 234 defer close(g.doneProcessTicks) 235 for { 236 select { 237 case <-g.ticker.C: 238 g.checkHeadSizeLimit() 239 g.checkTotalSizeLimit() 240 case <-g.Quit(): 241 return 242 } 243 } 244 } 245 246 // NOTE: this function is called manually in tests. 247 func (g *Group) checkHeadSizeLimit() { 248 limit := g.HeadSizeLimit() 249 if limit == 0 { 250 return 251 } 252 size, err := g.Head.Size() 253 if err != nil { 254 g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err) 255 return 256 } 257 if size >= limit { 258 g.RotateFile() 259 } 260 } 261 262 func (g *Group) checkTotalSizeLimit() { 263 limit := g.TotalSizeLimit() 264 if limit == 0 { 265 return 266 } 267 268 gInfo := g.readGroupInfo() 269 totalSize := gInfo.TotalSize 270 for i := 0; i < maxFilesToRemove; i++ { 271 index := gInfo.MinIndex + i 272 if totalSize < limit { 273 return 274 } 275 if index == gInfo.MaxIndex { 276 // Special degenerate case, just do nothing. 277 g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path) 278 return 279 } 280 pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex) 281 fInfo, err := os.Stat(pathToRemove) 282 if err != nil { 283 g.Logger.Error("Failed to fetch info for file", "file", pathToRemove) 284 continue 285 } 286 err = os.Remove(pathToRemove) 287 if err != nil { 288 g.Logger.Error("Failed to remove path", "path", pathToRemove) 289 return 290 } 291 totalSize -= fInfo.Size() 292 } 293 } 294 295 // RotateFile causes group to close the current head and assign it some index. 296 // Note it does not create a new head. 297 func (g *Group) RotateFile() { 298 g.mtx.Lock() 299 defer g.mtx.Unlock() 300 301 headPath := g.Head.Path 302 303 if err := g.headBuf.Flush(); err != nil { 304 panic(err) 305 } 306 307 if err := g.Head.Sync(); err != nil { 308 panic(err) 309 } 310 311 if err := g.Head.closeFile(); err != nil { 312 panic(err) 313 } 314 315 indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1) 316 if err := os.Rename(headPath, indexPath); err != nil { 317 panic(err) 318 } 319 320 g.maxIndex++ 321 } 322 323 // NewReader returns a new group reader. 324 // CONTRACT: Caller must close the returned GroupReader. 325 func (g *Group) NewReader(index int) (*GroupReader, error) { 326 r := newGroupReader(g) 327 err := r.SetIndex(index) 328 if err != nil { 329 return nil, err 330 } 331 return r, nil 332 } 333 334 // GroupInfo holds information about the group. 335 type GroupInfo struct { 336 MinIndex int // index of the first file in the group, including head 337 MaxIndex int // index of the last file in the group, including head 338 TotalSize int64 // total size of the group 339 HeadSize int64 // size of the head 340 } 341 342 // Returns info after scanning all files in g.Head's dir. 343 func (g *Group) ReadGroupInfo() GroupInfo { 344 g.mtx.Lock() 345 defer g.mtx.Unlock() 346 return g.readGroupInfo() 347 } 348 349 // Index includes the head. 350 // CONTRACT: caller should have called g.mtx.Lock 351 func (g *Group) readGroupInfo() GroupInfo { 352 groupDir := filepath.Dir(g.Head.Path) 353 headBase := filepath.Base(g.Head.Path) 354 var minIndex, maxIndex int = -1, -1 355 var totalSize, headSize int64 = 0, 0 356 357 dir, err := os.Open(groupDir) 358 if err != nil { 359 panic(err) 360 } 361 defer dir.Close() 362 fiz, err := dir.Readdir(0) 363 if err != nil { 364 panic(err) 365 } 366 367 // For each file in the directory, filter by pattern 368 for _, fileInfo := range fiz { 369 if fileInfo.Name() == headBase { 370 fileSize := fileInfo.Size() 371 totalSize += fileSize 372 headSize = fileSize 373 continue 374 } else if strings.HasPrefix(fileInfo.Name(), headBase) { 375 fileSize := fileInfo.Size() 376 totalSize += fileSize 377 indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`) 378 submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name())) 379 if len(submatch) != 0 { 380 // Matches 381 fileIndex, err := strconv.Atoi(string(submatch[1])) 382 if err != nil { 383 panic(err) 384 } 385 if maxIndex < fileIndex { 386 maxIndex = fileIndex 387 } 388 if minIndex == -1 || fileIndex < minIndex { 389 minIndex = fileIndex 390 } 391 } 392 } 393 } 394 395 // Now account for the head. 396 if minIndex == -1 { 397 // If there were no numbered files, 398 // then the head is index 0. 399 minIndex, maxIndex = 0, 0 400 } else { 401 // Otherwise, the head file is 1 greater 402 maxIndex++ 403 } 404 return GroupInfo{minIndex, maxIndex, totalSize, headSize} 405 } 406 407 func filePathForIndex(headPath string, index int, maxIndex int) string { 408 if index == maxIndex { 409 return headPath 410 } 411 return fmt.Sprintf("%v.%03d", headPath, index) 412 } 413 414 //-------------------------------------------------------------------------------- 415 416 // GroupReader provides an interface for reading from a Group. 417 type GroupReader struct { 418 *Group 419 mtx sync.Mutex 420 curIndex int 421 curFile *os.File 422 curReader *bufio.Reader 423 curLine []byte 424 } 425 426 func newGroupReader(g *Group) *GroupReader { 427 return &GroupReader{ 428 Group: g, 429 curIndex: 0, 430 curFile: nil, 431 curReader: nil, 432 curLine: nil, 433 } 434 } 435 436 // Close closes the GroupReader by closing the cursor file. 437 func (gr *GroupReader) Close() error { 438 gr.mtx.Lock() 439 defer gr.mtx.Unlock() 440 441 if gr.curReader != nil { 442 err := gr.curFile.Close() 443 gr.curIndex = 0 444 gr.curReader = nil 445 gr.curFile = nil 446 gr.curLine = nil 447 return err 448 } 449 return nil 450 } 451 452 // Read implements io.Reader, reading bytes from the current Reader 453 // incrementing index until enough bytes are read. 454 func (gr *GroupReader) Read(p []byte) (n int, err error) { 455 lenP := len(p) 456 if lenP == 0 { 457 return 0, errors.New("given empty slice") 458 } 459 460 gr.mtx.Lock() 461 defer gr.mtx.Unlock() 462 463 // Open file if not open yet 464 if gr.curReader == nil { 465 if err = gr.openFile(gr.curIndex); err != nil { 466 return 0, err 467 } 468 } 469 470 // Iterate over files until enough bytes are read 471 var nn int 472 for { 473 nn, err = gr.curReader.Read(p[n:]) 474 n += nn 475 if err == io.EOF { 476 if n >= lenP { 477 return n, nil 478 } 479 // Open the next file 480 if err1 := gr.openFile(gr.curIndex + 1); err1 != nil { 481 return n, err1 482 } 483 } else if err != nil { 484 return n, err 485 } else if nn == 0 { // empty file 486 return n, err 487 } 488 } 489 } 490 491 // IF index > gr.Group.maxIndex, returns io.EOF 492 // CONTRACT: caller should hold gr.mtx 493 func (gr *GroupReader) openFile(index int) error { 494 // Lock on Group to ensure that head doesn't move in the meanwhile. 495 gr.Group.mtx.Lock() 496 defer gr.Group.mtx.Unlock() 497 498 if index > gr.Group.maxIndex { 499 return io.EOF 500 } 501 502 curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex) 503 curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms) 504 if err != nil { 505 return err 506 } 507 curReader := bufio.NewReader(curFile) 508 509 // Update gr.cur* 510 if gr.curFile != nil { 511 gr.curFile.Close() // TODO return error? 512 } 513 gr.curIndex = index 514 gr.curFile = curFile 515 gr.curReader = curReader 516 gr.curLine = nil 517 return nil 518 } 519 520 // CurIndex returns cursor's file index. 521 func (gr *GroupReader) CurIndex() int { 522 gr.mtx.Lock() 523 defer gr.mtx.Unlock() 524 return gr.curIndex 525 } 526 527 // SetIndex sets the cursor's file index to index by opening a file at this 528 // position. 529 func (gr *GroupReader) SetIndex(index int) error { 530 gr.mtx.Lock() 531 defer gr.mtx.Unlock() 532 return gr.openFile(index) 533 }