github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/tm2/pkg/autofile/group.go (about) 1 package autofile 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "path" 10 "path/filepath" 11 "regexp" 12 "strconv" 13 "strings" 14 "sync" 15 "time" 16 17 "github.com/gnolang/gno/tm2/pkg/service" 18 ) 19 20 const ( 21 defaultGroupCheckDuration = 5000 * time.Millisecond 22 defaultHeadSizeLimit = 10 * 1024 * 1024 // 10MB 23 defaultTotalSizeLimit = 1 * 1024 * 1024 * 1024 // 1GB 24 maxFilesToRemove = 4 // needs to be greater than 1 25 ) 26 27 /* 28 You can open a Group to keep restrictions on an AutoFile, like 29 the maximum size of each chunk, and/or the total amount of bytes 30 stored in the group. 31 32 The first file to be written in the Group.Dir is the head file. 33 34 Dir/ 35 - <HeadPath> 36 37 Once the Head file reaches the size limit, it will be rotated. 38 39 Dir/ 40 - <HeadPath>.000 // First rolled file 41 - <HeadPath> // New head path, starts empty. 42 // The implicit index is 001. 43 44 As more files are written, the index numbers grow... 45 46 Dir/ 47 - <HeadPath>.000 // First rolled file 48 - <HeadPath>.001 // Second rolled file 49 - ... 50 - <HeadPath> // New head path 51 52 The Group can also be used to binary-search for some line, 53 assuming that marker lines are written occasionally. 54 */ 55 type Group struct { 56 service.BaseService 57 58 ID string 59 Head *AutoFile // The head AutoFile to write to 60 headBuf *bufio.Writer 61 Dir string // Directory that contains .Head 62 63 mtx sync.Mutex 64 headSizeLimit int64 65 totalSizeLimit int64 66 info GroupInfo 67 68 // TODO: When we start deleting files, we need to start tracking GroupReaders 69 // and their dependencies. 70 } 71 72 // OpenGroup creates a new Group with head at headPath. It returns an error if 73 // it fails to open head file. 74 func OpenGroup(headPath string, groupOptions ...func(*Group)) (g *Group, err error) { 75 dir := path.Dir(headPath) 76 head, err := OpenAutoFile(headPath) 77 if err != nil { 78 return nil, err 79 } 80 81 g = &Group{ 82 ID: "group:" + head.ID, 83 Head: head, 84 headBuf: bufio.NewWriterSize(head, 4096*10), 85 Dir: dir, 86 headSizeLimit: defaultHeadSizeLimit, 87 totalSizeLimit: defaultTotalSizeLimit, 88 info: GroupInfo{ 89 MinIndex: 0, 90 MaxIndex: 0, 91 TotalSize: 0, 92 HeadSize: 0, 93 }, 94 } 95 96 for _, option := range groupOptions { 97 option(g) 98 } 99 100 g.BaseService = *service.NewBaseService(nil, "Group", g) 101 g.info = g.readGroupInfo() 102 return 103 } 104 105 // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB. 106 func GroupHeadSizeLimit(limit int64) func(*Group) { 107 return func(g *Group) { 108 g.headSizeLimit = limit 109 } 110 } 111 112 // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB. 113 func GroupTotalSizeLimit(limit int64) func(*Group) { 114 return func(g *Group) { 115 g.totalSizeLimit = limit 116 } 117 } 118 119 // OnStart implements service.Service by starting the goroutine that checks file 120 // and group limits. 121 func (g *Group) OnStart() error { 122 return nil 123 } 124 125 // OnStop implements service.Service by stopping the goroutine described above. 126 // NOTE: g.Head must be closed separately using Close. 127 func (g *Group) OnStop() { 128 if err := g.FlushAndSync(); err != nil { 129 g.Logger.Error( 130 fmt.Sprintf("unable to gracefully flush data, %s", err.Error()), 131 ) 132 } 133 } 134 135 // Wait blocks until all internal goroutines are finished. Supposed to be 136 // called after Stop. 137 func (g *Group) Wait() { 138 // Nothing to wait for. 139 } 140 141 // Close closes the head file. The group must be stopped by this moment. 142 func (g *Group) Close() { 143 if err := g.FlushAndSync(); err != nil { 144 g.Logger.Error( 145 fmt.Sprintf("unable to gracefully flush data, %s", err.Error()), 146 ) 147 } 148 149 g.mtx.Lock() 150 defer g.mtx.Unlock() 151 152 if err := g.Head.Close(); err != nil { 153 g.Logger.Error( 154 fmt.Sprintf("unable to gracefully close group head, %s", err.Error()), 155 ) 156 } 157 } 158 159 // HeadSizeLimit returns the current head size limit. 160 func (g *Group) HeadSizeLimit() int64 { 161 g.mtx.Lock() 162 defer g.mtx.Unlock() 163 return g.headSizeLimit 164 } 165 166 // TotalSizeLimit returns total size limit of the group. 167 func (g *Group) TotalSizeLimit() int64 { 168 g.mtx.Lock() 169 defer g.mtx.Unlock() 170 return g.totalSizeLimit 171 } 172 173 // MaxIndex returns index of the last file in the group. 174 func (g *Group) MaxIndex() int { 175 g.mtx.Lock() 176 defer g.mtx.Unlock() 177 return g.info.MaxIndex 178 } 179 180 // MinIndex returns index of the first file in the group. 181 func (g *Group) MinIndex() int { 182 g.mtx.Lock() 183 defer g.mtx.Unlock() 184 return g.info.MinIndex 185 } 186 187 func (g *Group) TotalSize() int64 { 188 g.mtx.Lock() 189 defer g.mtx.Unlock() 190 return g.info.TotalSize 191 } 192 193 func (g *Group) HeadSize() int64 { 194 g.mtx.Lock() 195 defer g.mtx.Unlock() 196 return g.info.HeadSize 197 } 198 199 // Write writes the contents of p into the current head of the group. It 200 // returns the number of bytes written. If nn < len(p), it also returns an 201 // error explaining why the write is short. 202 // NOTE: Writes are buffered so they don't write synchronously 203 // TODO: Make it halt if space is unavailable 204 func (g *Group) Write(p []byte) (nn int, err error) { 205 g.mtx.Lock() 206 defer g.mtx.Unlock() 207 nn, err = g.headBuf.Write(p) 208 209 // Update limits 210 g.info.TotalSize += int64(nn) 211 g.info.HeadSize += int64(nn) 212 213 // Maybe rotate 214 if err == nil && 0 < g.headSizeLimit && g.headSizeLimit <= g.info.HeadSize { 215 g.rotateFile() 216 } 217 return 218 } 219 220 // WriteLine writes line into the current head of the group. It also appends "\n". 221 // NOTE: Writes are buffered so they don't write synchronously 222 // TODO: Make it halt if space is unavailable 223 func (g *Group) WriteLine(line string) error { 224 g.mtx.Lock() 225 defer g.mtx.Unlock() 226 nn, err := g.headBuf.Write([]byte(line + "\n")) 227 228 // Update limits 229 g.info.TotalSize += int64(nn) 230 g.info.HeadSize += int64(nn) 231 232 // Maybe rotate 233 if err == nil && 0 < g.headSizeLimit && g.headSizeLimit <= g.info.HeadSize { 234 g.rotateFile() 235 } 236 return err 237 } 238 239 // Buffered returns the size of the currently buffered data. 240 func (g *Group) Buffered() int { 241 g.mtx.Lock() 242 defer g.mtx.Unlock() 243 return g.headBuf.Buffered() 244 } 245 246 // FlushAndSync writes any buffered data to the underlying file and commits the 247 // current content of the file to stable storage (fsync). 248 func (g *Group) FlushAndSync() error { 249 g.mtx.Lock() 250 defer g.mtx.Unlock() 251 err := g.headBuf.Flush() 252 if err == nil { 253 err = g.Head.Sync() 254 } 255 return err 256 } 257 258 func (g *Group) ensureTotalSizeLimit() { 259 limit := g.totalSizeLimit 260 if limit == 0 { 261 return 262 } 263 264 for i := 0; i < maxFilesToRemove; i++ { 265 index := g.info.MinIndex + i 266 if g.info.TotalSize < limit { 267 return 268 } 269 if index == g.info.MaxIndex { 270 // Special degenerate case, just do nothing. 271 // group's head may grow without bound. 272 // TODO: an occasional warning? 273 return 274 } 275 pathToRemove := filePathForIndex(g.Head.Path, index, g.info.MaxIndex) 276 fInfo, err := os.Stat(pathToRemove) 277 if err != nil { 278 g.Logger.Error("Failed to fetch info for file", "file", pathToRemove) 279 g.info.MinIndex = index + 1 // bump MinIndex. 280 continue 281 } 282 err = os.Remove(pathToRemove) 283 if err != nil { 284 g.Logger.Error("Failed to remove path", "path", pathToRemove) 285 return 286 } 287 g.info.MinIndex = index + 1 // bump MinIndex. 288 g.info.TotalSize -= fInfo.Size() 289 } 290 } 291 292 // RotateFile causes group to close the current head and assign it some index. 293 // After rotation, the earliest chunk may be removed if total size > totalSizeLimit. 294 // Note it does not create a new head. 295 func (g *Group) RotateFile() { 296 g.mtx.Lock() 297 defer g.mtx.Unlock() 298 g.rotateFile() 299 } 300 301 func (g *Group) rotateFile() { 302 headPath := g.Head.Path 303 304 if err := g.headBuf.Flush(); err != nil { 305 panic(err) 306 } 307 308 if err := g.Head.Sync(); err != nil { 309 panic(err) 310 } 311 312 if err := g.Head.closeFile(); err != nil { 313 panic(err) 314 } 315 316 indexPath := filePathForIndex(headPath, g.info.MaxIndex, g.info.MaxIndex+1) 317 if err := os.Rename(headPath, indexPath); err != nil { 318 panic(err) 319 } 320 321 g.info.HeadSize = 0 322 g.info.MaxIndex++ 323 324 g.ensureTotalSizeLimit() 325 } 326 327 // NewReader returns a new group reader. 328 // If endIndex != 0, reads until endIndex exclusive. 329 // CONTRACT: Caller must close the returned GroupReader. 330 func (g *Group) NewReader(startIndex int, endIndex int) (*GroupReader, error) { 331 r := newGroupReader(g, startIndex, endIndex) 332 return r, nil 333 } 334 335 // GroupInfo holds information about the group. 336 type GroupInfo struct { 337 MinIndex int // index of the first file in the group, including head 338 MaxIndex int // index of the last file in the group, including head 339 TotalSize int64 // total size of the group 340 HeadSize int64 // size of the head 341 } 342 343 // Returns info after scanning all files in g.Head's dir. 344 func (g *Group) ReadGroupInfo() GroupInfo { 345 g.mtx.Lock() 346 defer g.mtx.Unlock() 347 return g.readGroupInfo() 348 } 349 350 var indexedFilePattern = regexp.MustCompile(`^.+\.([0-9]{3,})$`) 351 352 // Index includes the head. 353 // CONTRACT: caller should have called g.mtx.Lock 354 func (g *Group) readGroupInfo() GroupInfo { 355 groupDir := filepath.Dir(g.Head.Path) 356 headBase := filepath.Base(g.Head.Path) 357 var minIndex, maxIndex int = -1, -1 358 var totalSize, headSize int64 = 0, 0 359 360 dir, err := os.Open(groupDir) 361 if err != nil { 362 panic(err) 363 } 364 defer dir.Close() 365 fiz, err := dir.Readdir(0) 366 if err != nil { 367 panic(err) 368 } 369 370 // For each file in the directory, filter by pattern 371 for _, fileInfo := range fiz { 372 if fileInfo.Name() == headBase { 373 fileSize := fileInfo.Size() 374 totalSize += fileSize 375 headSize = fileSize 376 continue 377 } else if strings.HasPrefix(fileInfo.Name(), headBase) { 378 fileSize := fileInfo.Size() 379 totalSize += fileSize 380 submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name())) 381 if len(submatch) != 0 { 382 // Matches 383 fileIndex, err := strconv.Atoi(string(submatch[1])) 384 if err != nil { 385 panic(err) 386 } 387 if maxIndex < fileIndex { 388 maxIndex = fileIndex 389 } 390 if minIndex == -1 || fileIndex < minIndex { 391 minIndex = fileIndex 392 } 393 } 394 } 395 } 396 397 // TODO ensure that all files are present between min and max. 398 399 // Now account for the head. 400 if minIndex == -1 { 401 // If there were no numbered files, 402 // then the head is index 0. 403 minIndex, maxIndex = 0, 0 404 } else { 405 // Otherwise, the head file is 1 greater 406 maxIndex++ 407 } 408 return GroupInfo{minIndex, maxIndex, totalSize, headSize} 409 } 410 411 func filePathForIndex(headPath string, index int, maxIndex int) string { 412 if index == maxIndex { 413 return headPath 414 } 415 return fmt.Sprintf("%v.%03d", headPath, index) 416 } 417 418 // -------------------------------------------------------------------------------- 419 420 // GroupReader provides an interface for reading from a Group. 421 type GroupReader struct { 422 *Group 423 mtx sync.Mutex 424 startIndex int 425 endIndex int 426 curIndex int 427 curFile *os.File 428 curReader *bufio.Reader 429 curLine []byte 430 } 431 432 func newGroupReader(g *Group, startIndex int, endIndex int) *GroupReader { 433 gr := &GroupReader{ 434 Group: g, 435 startIndex: startIndex, 436 endIndex: endIndex, 437 curIndex: 0, 438 curFile: nil, 439 curReader: nil, 440 curLine: nil, 441 } 442 gr.openFile(startIndex) 443 return gr 444 } 445 446 // Close closes the GroupReader by closing the cursor file. 447 func (gr *GroupReader) Close() error { 448 gr.mtx.Lock() 449 defer gr.mtx.Unlock() 450 451 if gr.curReader != nil { 452 err := gr.curFile.Close() 453 gr.curIndex = 0 454 gr.curReader = nil 455 gr.curFile = nil 456 gr.curLine = nil 457 return err 458 } 459 return nil 460 } 461 462 // Read implements io.Reader, reading bytes from the current Reader 463 // incrementing index until enough bytes are read. 464 func (gr *GroupReader) Read(p []byte) (n int, err error) { 465 lenP := len(p) 466 if lenP == 0 { 467 return 0, errors.New("given empty slice") 468 } 469 470 gr.mtx.Lock() 471 defer gr.mtx.Unlock() 472 473 // Open file if not open yet 474 if gr.curReader == nil { 475 if err = gr.openFile(gr.curIndex); err != nil { 476 return 0, err 477 } 478 } 479 480 // Iterate over files until enough bytes are read 481 var nn int 482 for { 483 nn, err = gr.curReader.Read(p[n:]) 484 n += nn 485 switch { 486 case errors.Is(err, io.EOF): 487 if n >= lenP { 488 return n, nil 489 } 490 // Open the next file 491 if err1 := gr.openFile(gr.curIndex + 1); err1 != nil { 492 return n, err1 493 } 494 case err != nil: 495 return n, err 496 case nn == 0: // empty file 497 return n, err 498 } 499 } 500 } 501 502 // IF index > gr.Group.maxIndex, returns io.EOF 503 // CONTRACT: caller should hold gr.mtx 504 func (gr *GroupReader) openFile(index int) error { 505 // Lock on Group to ensure that head doesn't move in the meanwhile. 506 gr.Group.mtx.Lock() 507 defer gr.Group.mtx.Unlock() 508 509 if gr.Group.info.MaxIndex < index { 510 return io.EOF 511 } 512 if gr.endIndex != 0 && gr.endIndex <= index { 513 return io.EOF 514 } 515 516 curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.info.MaxIndex) 517 curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms) 518 if err != nil { 519 return err 520 } 521 curReader := bufio.NewReader(curFile) 522 523 // Update gr.cur* 524 if gr.curFile != nil { 525 gr.curFile.Close() // TODO return error? 526 } 527 gr.curIndex = index 528 gr.curFile = curFile 529 gr.curReader = curReader 530 gr.curLine = nil 531 return nil 532 } 533 534 // CurIndex returns cursor's file index. 535 func (gr *GroupReader) CurIndex() int { 536 gr.mtx.Lock() 537 defer gr.mtx.Unlock() 538 return gr.curIndex 539 }