github.com/mydexchain/tendermint@v0.0.4/libs/autofile/group.go (about) 1 package autofile 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "path/filepath" 10 "regexp" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/mydexchain/tendermint/libs/service" 17 ) 18 19 const ( 20 defaultGroupCheckDuration = 5000 * time.Millisecond 21 defaultHeadSizeLimit = 10 * 1024 * 1024 // 10MB 22 defaultTotalSizeLimit = 1 * 1024 * 1024 * 1024 // 1GB 23 maxFilesToRemove = 4 // needs to be greater than 1 24 ) 25 26 /* 27 You can open a Group to keep restrictions on an AutoFile, like 28 the maximum size of each chunk, and/or the total amount of bytes 29 stored in the group. 30 31 The first file to be written in the Group.Dir is the head file. 32 33 Dir/ 34 - <HeadPath> 35 36 Once the Head file reaches the size limit, it will be rotated. 37 38 Dir/ 39 - <HeadPath>.000 // First rolled file 40 - <HeadPath> // New head path, starts empty. 41 // The implicit index is 001. 42 43 As more files are written, the index numbers grow... 44 45 Dir/ 46 - <HeadPath>.000 // First rolled file 47 - <HeadPath>.001 // Second rolled file 48 - ... 49 - <HeadPath> // New head path 50 51 The Group can also be used to binary-search for some line, 52 assuming that marker lines are written occasionally. 53 */ 54 type Group struct { 55 service.BaseService 56 57 ID string 58 Head *AutoFile // The head AutoFile to write to 59 headBuf *bufio.Writer 60 Dir string // Directory that contains .Head 61 ticker *time.Ticker 62 mtx sync.Mutex 63 headSizeLimit int64 64 totalSizeLimit int64 65 groupCheckDuration time.Duration 66 minIndex int // Includes head 67 maxIndex int // Includes head, where Head will move to 68 69 // close this when the processTicks routine is done. 70 // this ensures we can cleanup the dir after calling Stop 71 // and the routine won't be trying to access it anymore 72 doneProcessTicks chan struct{} 73 74 // TODO: When we start deleting files, we need to start tracking GroupReaders 75 // and their dependencies. 76 } 77 78 // OpenGroup creates a new Group with head at headPath. It returns an error if 79 // it fails to open head file. 80 func OpenGroup(headPath string, groupOptions ...func(*Group)) (*Group, error) { 81 dir, err := filepath.Abs(filepath.Dir(headPath)) 82 if err != nil { 83 return nil, err 84 } 85 head, err := OpenAutoFile(headPath) 86 if err != nil { 87 return nil, err 88 } 89 90 g := &Group{ 91 ID: "group:" + head.ID, 92 Head: head, 93 headBuf: bufio.NewWriterSize(head, 4096*10), 94 Dir: dir, 95 headSizeLimit: defaultHeadSizeLimit, 96 totalSizeLimit: defaultTotalSizeLimit, 97 groupCheckDuration: defaultGroupCheckDuration, 98 minIndex: 0, 99 maxIndex: 0, 100 doneProcessTicks: make(chan struct{}), 101 } 102 103 for _, option := range groupOptions { 104 option(g) 105 } 106 107 g.BaseService = *service.NewBaseService(nil, "Group", g) 108 109 gInfo := g.readGroupInfo() 110 g.minIndex = gInfo.MinIndex 111 g.maxIndex = gInfo.MaxIndex 112 return g, nil 113 } 114 115 // GroupCheckDuration allows you to overwrite default groupCheckDuration. 116 func GroupCheckDuration(duration time.Duration) func(*Group) { 117 return func(g *Group) { 118 g.groupCheckDuration = duration 119 } 120 } 121 122 // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB. 123 func GroupHeadSizeLimit(limit int64) func(*Group) { 124 return func(g *Group) { 125 g.headSizeLimit = limit 126 } 127 } 128 129 // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB. 130 func GroupTotalSizeLimit(limit int64) func(*Group) { 131 return func(g *Group) { 132 g.totalSizeLimit = limit 133 } 134 } 135 136 // OnStart implements service.Service by starting the goroutine that checks file 137 // and group limits. 138 func (g *Group) OnStart() error { 139 g.ticker = time.NewTicker(g.groupCheckDuration) 140 go g.processTicks() 141 return nil 142 } 143 144 // OnStop implements service.Service by stopping the goroutine described above. 145 // NOTE: g.Head must be closed separately using Close. 146 func (g *Group) OnStop() { 147 g.ticker.Stop() 148 if err := g.FlushAndSync(); err != nil { 149 g.Logger.Error("Error flushin to disk", "err", err) 150 } 151 } 152 153 // Wait blocks until all internal goroutines are finished. Supposed to be 154 // called after Stop. 155 func (g *Group) Wait() { 156 // wait for processTicks routine to finish 157 <-g.doneProcessTicks 158 } 159 160 // Close closes the head file. The group must be stopped by this moment. 161 func (g *Group) Close() { 162 if err := g.FlushAndSync(); err != nil { 163 g.Logger.Error("Error flushin to disk", "err", err) 164 } 165 166 g.mtx.Lock() 167 _ = g.Head.closeFile() 168 g.mtx.Unlock() 169 } 170 171 // HeadSizeLimit returns the current head size limit. 172 func (g *Group) HeadSizeLimit() int64 { 173 g.mtx.Lock() 174 defer g.mtx.Unlock() 175 return g.headSizeLimit 176 } 177 178 // TotalSizeLimit returns total size limit of the group. 179 func (g *Group) TotalSizeLimit() int64 { 180 g.mtx.Lock() 181 defer g.mtx.Unlock() 182 return g.totalSizeLimit 183 } 184 185 // MaxIndex returns index of the last file in the group. 186 func (g *Group) MaxIndex() int { 187 g.mtx.Lock() 188 defer g.mtx.Unlock() 189 return g.maxIndex 190 } 191 192 // MinIndex returns index of the first file in the group. 193 func (g *Group) MinIndex() int { 194 g.mtx.Lock() 195 defer g.mtx.Unlock() 196 return g.minIndex 197 } 198 199 // Write writes the contents of p into the current head of the group. It 200 // returns the number of bytes written. If nn < len(p), it also returns an 201 // error explaining why the write is short. 202 // NOTE: Writes are buffered so they don't write synchronously 203 // TODO: Make it halt if space is unavailable 204 func (g *Group) Write(p []byte) (nn int, err error) { 205 g.mtx.Lock() 206 defer g.mtx.Unlock() 207 return g.headBuf.Write(p) 208 } 209 210 // WriteLine writes line into the current head of the group. It also appends "\n". 211 // NOTE: Writes are buffered so they don't write synchronously 212 // TODO: Make it halt if space is unavailable 213 func (g *Group) WriteLine(line string) error { 214 g.mtx.Lock() 215 defer g.mtx.Unlock() 216 _, err := g.headBuf.Write([]byte(line + "\n")) 217 return err 218 } 219 220 // Buffered returns the size of the currently buffered data. 221 func (g *Group) Buffered() int { 222 g.mtx.Lock() 223 defer g.mtx.Unlock() 224 return g.headBuf.Buffered() 225 } 226 227 // FlushAndSync writes any buffered data to the underlying file and commits the 228 // current content of the file to stable storage (fsync). 229 func (g *Group) FlushAndSync() error { 230 g.mtx.Lock() 231 defer g.mtx.Unlock() 232 err := g.headBuf.Flush() 233 if err == nil { 234 err = g.Head.Sync() 235 } 236 return err 237 } 238 239 func (g *Group) processTicks() { 240 defer close(g.doneProcessTicks) 241 for { 242 select { 243 case <-g.ticker.C: 244 g.checkHeadSizeLimit() 245 g.checkTotalSizeLimit() 246 case <-g.Quit(): 247 return 248 } 249 } 250 } 251 252 // NOTE: this function is called manually in tests. 253 func (g *Group) checkHeadSizeLimit() { 254 limit := g.HeadSizeLimit() 255 if limit == 0 { 256 return 257 } 258 size, err := g.Head.Size() 259 if err != nil { 260 g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err) 261 return 262 } 263 if size >= limit { 264 g.RotateFile() 265 } 266 } 267 268 func (g *Group) checkTotalSizeLimit() { 269 limit := g.TotalSizeLimit() 270 if limit == 0 { 271 return 272 } 273 274 gInfo := g.readGroupInfo() 275 totalSize := gInfo.TotalSize 276 for i := 0; i < maxFilesToRemove; i++ { 277 index := gInfo.MinIndex + i 278 if totalSize < limit { 279 return 280 } 281 if index == gInfo.MaxIndex { 282 // Special degenerate case, just do nothing. 283 g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path) 284 return 285 } 286 pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex) 287 fInfo, err := os.Stat(pathToRemove) 288 if err != nil { 289 g.Logger.Error("Failed to fetch info for file", "file", pathToRemove) 290 continue 291 } 292 err = os.Remove(pathToRemove) 293 if err != nil { 294 g.Logger.Error("Failed to remove path", "path", pathToRemove) 295 return 296 } 297 totalSize -= fInfo.Size() 298 } 299 } 300 301 // RotateFile causes group to close the current head and assign it some index. 302 // Note it does not create a new head. 303 func (g *Group) RotateFile() { 304 g.mtx.Lock() 305 defer g.mtx.Unlock() 306 307 headPath := g.Head.Path 308 309 if err := g.headBuf.Flush(); err != nil { 310 panic(err) 311 } 312 313 if err := g.Head.Sync(); err != nil { 314 panic(err) 315 } 316 317 if err := g.Head.closeFile(); err != nil { 318 panic(err) 319 } 320 321 indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1) 322 if err := os.Rename(headPath, indexPath); err != nil { 323 panic(err) 324 } 325 326 g.maxIndex++ 327 } 328 329 // NewReader returns a new group reader. 330 // CONTRACT: Caller must close the returned GroupReader. 331 func (g *Group) NewReader(index int) (*GroupReader, error) { 332 r := newGroupReader(g) 333 err := r.SetIndex(index) 334 if err != nil { 335 return nil, err 336 } 337 return r, nil 338 } 339 340 // GroupInfo holds information about the group. 341 type GroupInfo struct { 342 MinIndex int // index of the first file in the group, including head 343 MaxIndex int // index of the last file in the group, including head 344 TotalSize int64 // total size of the group 345 HeadSize int64 // size of the head 346 } 347 348 // Returns info after scanning all files in g.Head's dir. 349 func (g *Group) ReadGroupInfo() GroupInfo { 350 g.mtx.Lock() 351 defer g.mtx.Unlock() 352 return g.readGroupInfo() 353 } 354 355 // Index includes the head. 356 // CONTRACT: caller should have called g.mtx.Lock 357 func (g *Group) readGroupInfo() GroupInfo { 358 groupDir := filepath.Dir(g.Head.Path) 359 headBase := filepath.Base(g.Head.Path) 360 var minIndex, maxIndex int = -1, -1 361 var totalSize, headSize int64 = 0, 0 362 363 dir, err := os.Open(groupDir) 364 if err != nil { 365 panic(err) 366 } 367 defer dir.Close() 368 fiz, err := dir.Readdir(0) 369 if err != nil { 370 panic(err) 371 } 372 373 // For each file in the directory, filter by pattern 374 for _, fileInfo := range fiz { 375 if fileInfo.Name() == headBase { 376 fileSize := fileInfo.Size() 377 totalSize += fileSize 378 headSize = fileSize 379 continue 380 } else if strings.HasPrefix(fileInfo.Name(), headBase) { 381 fileSize := fileInfo.Size() 382 totalSize += fileSize 383 indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`) 384 submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name())) 385 if len(submatch) != 0 { 386 // Matches 387 fileIndex, err := strconv.Atoi(string(submatch[1])) 388 if err != nil { 389 panic(err) 390 } 391 if maxIndex < fileIndex { 392 maxIndex = fileIndex 393 } 394 if minIndex == -1 || fileIndex < minIndex { 395 minIndex = fileIndex 396 } 397 } 398 } 399 } 400 401 // Now account for the head. 402 if minIndex == -1 { 403 // If there were no numbered files, 404 // then the head is index 0. 405 minIndex, maxIndex = 0, 0 406 } else { 407 // Otherwise, the head file is 1 greater 408 maxIndex++ 409 } 410 return GroupInfo{minIndex, maxIndex, totalSize, headSize} 411 } 412 413 func filePathForIndex(headPath string, index int, maxIndex int) string { 414 if index == maxIndex { 415 return headPath 416 } 417 return fmt.Sprintf("%v.%03d", headPath, index) 418 } 419 420 //-------------------------------------------------------------------------------- 421 422 // GroupReader provides an interface for reading from a Group. 423 type GroupReader struct { 424 *Group 425 mtx sync.Mutex 426 curIndex int 427 curFile *os.File 428 curReader *bufio.Reader 429 curLine []byte 430 } 431 432 func newGroupReader(g *Group) *GroupReader { 433 return &GroupReader{ 434 Group: g, 435 curIndex: 0, 436 curFile: nil, 437 curReader: nil, 438 curLine: nil, 439 } 440 } 441 442 // Close closes the GroupReader by closing the cursor file. 443 func (gr *GroupReader) Close() error { 444 gr.mtx.Lock() 445 defer gr.mtx.Unlock() 446 447 if gr.curReader != nil { 448 err := gr.curFile.Close() 449 gr.curIndex = 0 450 gr.curReader = nil 451 gr.curFile = nil 452 gr.curLine = nil 453 return err 454 } 455 return nil 456 } 457 458 // Read implements io.Reader, reading bytes from the current Reader 459 // incrementing index until enough bytes are read. 460 func (gr *GroupReader) Read(p []byte) (n int, err error) { 461 lenP := len(p) 462 if lenP == 0 { 463 return 0, errors.New("given empty slice") 464 } 465 466 gr.mtx.Lock() 467 defer gr.mtx.Unlock() 468 469 // Open file if not open yet 470 if gr.curReader == nil { 471 if err = gr.openFile(gr.curIndex); err != nil { 472 return 0, err 473 } 474 } 475 476 // Iterate over files until enough bytes are read 477 var nn int 478 for { 479 nn, err = gr.curReader.Read(p[n:]) 480 n += nn 481 switch { 482 case err == io.EOF: 483 if n >= lenP { 484 return n, nil 485 } 486 // Open the next file 487 if err1 := gr.openFile(gr.curIndex + 1); err1 != nil { 488 return n, err1 489 } 490 case err != nil: 491 return n, err 492 case nn == 0: // empty file 493 return n, err 494 } 495 } 496 } 497 498 // IF index > gr.Group.maxIndex, returns io.EOF 499 // CONTRACT: caller should hold gr.mtx 500 func (gr *GroupReader) openFile(index int) error { 501 // Lock on Group to ensure that head doesn't move in the meanwhile. 502 gr.Group.mtx.Lock() 503 defer gr.Group.mtx.Unlock() 504 505 if index > gr.Group.maxIndex { 506 return io.EOF 507 } 508 509 curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex) 510 curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms) 511 if err != nil { 512 return err 513 } 514 curReader := bufio.NewReader(curFile) 515 516 // Update gr.cur* 517 if gr.curFile != nil { 518 gr.curFile.Close() // TODO return error? 519 } 520 gr.curIndex = index 521 gr.curFile = curFile 522 gr.curReader = curReader 523 gr.curLine = nil 524 return nil 525 } 526 527 // CurIndex returns cursor's file index. 528 func (gr *GroupReader) CurIndex() int { 529 gr.mtx.Lock() 530 defer gr.mtx.Unlock() 531 return gr.curIndex 532 } 533 534 // SetIndex sets the cursor's file index to index by opening a file at this 535 // position. 536 func (gr *GroupReader) SetIndex(index int) error { 537 gr.mtx.Lock() 538 defer gr.mtx.Unlock() 539 return gr.openFile(index) 540 }