github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/disk_cache.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fileservice 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io" 22 "io/fs" 23 "os" 24 "path/filepath" 25 "strings" 26 "sync" 27 "syscall" 28 29 "github.com/cespare/xxhash/v2" 30 "github.com/matrixorigin/matrixone/pkg/fileservice/fifocache" 31 "github.com/matrixorigin/matrixone/pkg/logutil" 32 "github.com/matrixorigin/matrixone/pkg/perfcounter" 33 metric "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 34 "go.uber.org/zap" 35 ) 36 37 type DiskCache struct { 38 path string 39 perfCounterSets []*perfcounter.CounterSet 40 41 updatingPaths struct { 42 *sync.Cond 43 m map[string]bool 44 } 45 46 cache *fifocache.Cache[string, struct{}] 47 } 48 49 func NewDiskCache( 50 ctx context.Context, 51 path string, 52 capacity int, 53 perfCounterSets []*perfcounter.CounterSet, 54 ) (ret *DiskCache, err error) { 55 56 err = os.MkdirAll(path, 0755) 57 if err != nil { 58 return nil, err 59 } 60 61 ret = &DiskCache{ 62 path: path, 63 perfCounterSets: perfCounterSets, 64 65 cache: fifocache.New( 66 capacity, 67 func(path string, _ struct{}) { 68 err := os.Remove(path) 69 if err == nil { 70 perfcounter.Update(ctx, func(set *perfcounter.CounterSet) { 71 set.FileService.Cache.Disk.Evict.Add(1) 72 }, perfCounterSets...) 73 } 74 }, 75 func(key string) uint8 { 76 return uint8(xxhash.Sum64String(key)) 77 }, 78 ), 79 } 80 ret.updatingPaths.Cond = sync.NewCond(new(sync.Mutex)) 81 ret.updatingPaths.m = make(map[string]bool) 82 83 ret.loadCache() 84 85 return ret, nil 86 } 87 88 func (d *DiskCache) loadCache() { 89 90 _ = filepath.WalkDir(d.path, func(path string, entry os.DirEntry, err error) error { 91 if err != nil { 92 return nil //ignore 93 } 94 if entry.IsDir() { 95 // try remove if empty. for cleaning old structure 96 if path != d.path { 97 os.Remove(path) 98 } 99 return nil 100 } 101 if !strings.HasSuffix(entry.Name(), cacheFileSuffix) { 102 return nil 103 } 104 info, err := entry.Info() 105 if err != nil { 106 return nil // ignore 107 } 108 109 d.cache.Set(path, struct{}{}, int(fileSize(info))) 110 111 return nil 112 }) 113 114 } 115 116 var _ IOVectorCache = new(DiskCache) 117 118 func (d *DiskCache) Read( 119 ctx context.Context, 120 vector *IOVector, 121 ) ( 122 err error, 123 ) { 124 125 if vector.Policy.Any(SkipDiskCacheReads) { 126 return nil 127 } 128 129 var numHit, numRead, numOpenIOEntry, numOpenFull, numError int64 130 defer func() { 131 metric.FSReadHitDiskCounter.Add(float64(numHit)) 132 perfcounter.Update(ctx, func(c *perfcounter.CounterSet) { 133 c.FileService.Cache.Read.Add(numRead) 134 c.FileService.Cache.Hit.Add(numHit) 135 c.FileService.Cache.Disk.Read.Add(numRead) 136 c.FileService.Cache.Disk.Hit.Add(numHit) 137 c.FileService.Cache.Disk.Error.Add(numError) 138 c.FileService.Cache.Disk.OpenIOEntryFile.Add(numOpenIOEntry) 139 c.FileService.Cache.Disk.OpenFullFile.Add(numOpenFull) 140 }, d.perfCounterSets...) 141 }() 142 143 path, err := ParsePath(vector.FilePath) 144 if err != nil { 145 return err 146 } 147 148 openedFiles := make(map[string]*os.File) 149 defer func() { 150 for _, file := range openedFiles { 151 _ = file.Close() 152 } 153 }() 154 155 fillEntry := func(entry *IOEntry) error { 156 if entry.done { 157 return nil 158 } 159 if entry.Size < 0 { 160 // ignore size unknown entry 161 return nil 162 } 163 164 numRead++ 165 166 var file *os.File 167 168 // entry file 169 diskPath := d.pathForIOEntry(path.File, *entry) 170 if f, ok := openedFiles[diskPath]; ok { 171 // use opened file 172 _, err = file.Seek(entry.Offset, io.SeekStart) 173 if err == nil { 174 file = f 175 } 176 } else { 177 // open file 178 d.waitUpdateComplete(diskPath) 179 diskFile, err := os.Open(diskPath) 180 if err == nil { 181 file = diskFile 182 defer func() { 183 openedFiles[diskPath] = diskFile 184 }() 185 numOpenIOEntry++ 186 } 187 } 188 189 if file == nil { 190 // try full file 191 diskPath = d.pathForFile(path.File) 192 if f, ok := openedFiles[diskPath]; ok { 193 // use opened file 194 _, err = f.Seek(entry.Offset, io.SeekStart) 195 if err == nil { 196 file = f 197 } 198 } else { 199 // open file 200 d.waitUpdateComplete(diskPath) 201 diskFile, err := os.Open(diskPath) 202 if err == nil { 203 defer func() { 204 openedFiles[diskPath] = diskFile 205 }() 206 numOpenFull++ 207 // seek 208 _, err = diskFile.Seek(entry.Offset, io.SeekStart) 209 if err == nil { 210 file = diskFile 211 } 212 } 213 } 214 } 215 216 if file == nil { 217 // no file available 218 return nil 219 } 220 221 if _, ok := d.cache.Get(diskPath); !ok { 222 // set cache 223 stat, err := file.Stat() 224 if err != nil { 225 return err 226 } 227 d.cache.Set(diskPath, struct{}{}, int(fileSize(stat))) 228 } 229 230 if err := entry.ReadFromOSFile(file); err != nil { 231 // ignore error 232 numError++ 233 logutil.Warn("read disk cache error", zap.Any("error", err)) 234 return nil 235 } 236 237 entry.done = true 238 entry.fromCache = d 239 numHit++ 240 241 return nil 242 } 243 244 for i := range vector.Entries { 245 if err := fillEntry(&vector.Entries[i]); err != nil { 246 return err 247 } 248 } 249 250 return nil 251 } 252 253 func (d *DiskCache) Update( 254 ctx context.Context, 255 vector *IOVector, 256 async bool, 257 ) ( 258 err error, 259 ) { 260 261 if vector.Policy.Any(SkipDiskCacheWrites) { 262 return nil 263 } 264 265 path, err := ParsePath(vector.FilePath) 266 if err != nil { 267 return err 268 } 269 270 // callback 271 var onWritten []OnDiskCacheWrittenFunc 272 if v := ctx.Value(CtxKeyDiskCacheCallbacks); v != nil { 273 onWritten = v.(*DiskCacheCallbacks).OnWritten 274 } 275 276 for _, entry := range vector.Entries { 277 if len(entry.Data) == 0 { 278 // no data 279 continue 280 } 281 if entry.Size < 0 { 282 // ignore size unknown entry 283 continue 284 } 285 if entry.fromCache == d { 286 // no need to update 287 continue 288 } 289 290 diskPath := d.pathForIOEntry(path.File, entry) 291 written, err := d.writeFile(ctx, diskPath, func(context.Context) (io.ReadCloser, error) { 292 return io.NopCloser(bytes.NewReader(entry.Data)), nil 293 }) 294 if err != nil { 295 return err 296 } 297 if written { 298 for _, fn := range onWritten { 299 fn(vector.FilePath, entry) 300 } 301 } 302 303 } 304 305 return nil 306 } 307 308 func (d *DiskCache) writeFile( 309 ctx context.Context, 310 diskPath string, 311 openReader func(context.Context) (io.ReadCloser, error), 312 ) (bool, error) { 313 var numCreate, numStat, numError, numWrite int64 314 defer func() { 315 perfcounter.Update(ctx, func(set *perfcounter.CounterSet) { 316 set.FileService.Cache.Disk.CreateFile.Add(numCreate) 317 set.FileService.Cache.Disk.StatFile.Add(numStat) 318 set.FileService.Cache.Disk.WriteFile.Add(numWrite) 319 set.FileService.Cache.Disk.Error.Add(numError) 320 }) 321 }() 322 323 doneUpdate := d.startUpdate(diskPath) 324 defer doneUpdate() 325 326 if _, ok := d.cache.Get(diskPath); ok { 327 // already exists 328 return false, nil 329 } 330 stat, err := os.Stat(diskPath) 331 if err == nil { 332 // file exists 333 d.cache.Set(diskPath, struct{}{}, int(fileSize(stat))) 334 numStat++ 335 return false, nil 336 } 337 338 // write data 339 dir := filepath.Dir(diskPath) 340 err = os.MkdirAll(dir, 0755) 341 if err != nil { 342 numError++ 343 logutil.Warn("write disk cache error", zap.Any("error", err)) 344 return false, nil // ignore error 345 } 346 f, err := os.CreateTemp(dir, "*") 347 if err != nil { 348 numError++ 349 logutil.Warn("write disk cache error", zap.Any("error", err)) 350 return false, nil // ignore error 351 } 352 numCreate++ 353 from, err := openReader(ctx) 354 if err != nil { 355 numError++ 356 logutil.Warn("write disk cache error", zap.Any("error", err)) 357 return false, nil // ignore error 358 } 359 defer from.Close() 360 var buf []byte 361 put := ioBufferPool.Get(&buf) 362 defer put.Put() 363 _, err = io.CopyBuffer(f, from, buf) 364 if err != nil { 365 f.Close() 366 os.Remove(f.Name()) 367 numError++ 368 logutil.Warn("write disk cache error", zap.Any("error", err)) 369 return false, nil // ignore error 370 } 371 372 if err := f.Sync(); err != nil { 373 numError++ 374 logutil.Warn("write disk cache error", zap.Any("error", err)) 375 return false, nil // ignore error 376 } 377 378 // set cache 379 stat, err = f.Stat() 380 if err != nil { 381 numError++ 382 logutil.Warn("write disk cache error", zap.Any("error", err)) 383 return false, nil // ignore error 384 } 385 d.cache.Set(diskPath, struct{}{}, int(fileSize(stat))) 386 387 if err := f.Close(); err != nil { 388 numError++ 389 logutil.Warn("write disk cache error", zap.Any("error", err)) 390 return false, nil // ignore error 391 } 392 if err := os.Rename(f.Name(), diskPath); err != nil { 393 numError++ 394 logutil.Warn("write disk cache error", zap.Any("error", err)) 395 return false, nil // ignore error 396 } 397 logutil.Debug("disk cache file written", 398 zap.Any("path", diskPath), 399 ) 400 401 numWrite++ 402 403 return true, nil 404 } 405 406 func (d *DiskCache) Flush() { 407 } 408 409 const cacheFileSuffix = ".mofscache" 410 411 func (d *DiskCache) pathForIOEntry(path string, entry IOEntry) string { 412 if entry.Size < 0 { 413 panic("should not cache size -1 entry") 414 } 415 return filepath.Join( 416 d.path, 417 fmt.Sprintf("%d-%d%s%s", entry.Offset, entry.Size, toOSPath(path), cacheFileSuffix), 418 ) 419 } 420 421 func (d *DiskCache) pathForFile(path string) string { 422 return filepath.Join( 423 d.path, 424 fmt.Sprintf("full%s%s", toOSPath(path), cacheFileSuffix), 425 ) 426 } 427 428 var ErrNotCacheFile = errorStr("not a cache file") 429 430 func (d *DiskCache) decodeFilePath(diskPath string) (string, error) { 431 path, err := filepath.Rel(d.path, diskPath) 432 if err != nil { 433 return "", err 434 } 435 if !strings.HasPrefix(path, "full") { 436 return "", ErrNotCacheFile 437 } 438 path = strings.TrimPrefix(path, "full") 439 path = strings.TrimSuffix(path, cacheFileSuffix) 440 return fromOSPath(path), nil 441 } 442 443 func (d *DiskCache) waitUpdateComplete(path string) { 444 d.updatingPaths.L.Lock() 445 for d.updatingPaths.m[path] { 446 d.updatingPaths.Wait() 447 } 448 d.updatingPaths.L.Unlock() 449 } 450 451 func (d *DiskCache) startUpdate(path string) (done func()) { 452 d.updatingPaths.L.Lock() 453 for d.updatingPaths.m[path] { 454 d.updatingPaths.Wait() 455 } 456 d.updatingPaths.m[path] = true 457 d.updatingPaths.L.Unlock() 458 done = func() { 459 d.updatingPaths.L.Lock() 460 delete(d.updatingPaths.m, path) 461 d.updatingPaths.Broadcast() 462 d.updatingPaths.L.Unlock() 463 } 464 return 465 } 466 467 var _ FileCache = new(DiskCache) 468 469 func (d *DiskCache) SetFile( 470 ctx context.Context, 471 path string, 472 openReader func(context.Context) (io.ReadCloser, error), 473 ) error { 474 diskPath := d.pathForFile(path) 475 _, err := d.writeFile(ctx, diskPath, openReader) 476 if err != nil { 477 return err 478 } 479 return nil 480 } 481 482 func (d *DiskCache) DeletePaths( 483 ctx context.Context, 484 paths []string, 485 ) error { 486 487 for _, path := range paths { 488 diskPath := d.pathForFile(path) 489 //TODO also delete IOEntry files 490 491 doneUpdate := d.startUpdate(diskPath) 492 defer doneUpdate() 493 494 if err := os.Remove(diskPath); err != nil { 495 if !os.IsNotExist(err) { 496 return err 497 } 498 } 499 d.cache.Delete(diskPath) 500 } 501 502 return nil 503 } 504 505 func fileSize(info fs.FileInfo) int64 { 506 if sys, ok := info.Sys().(*syscall.Stat_t); ok { 507 return int64(sys.Blocks) * 512 // it's always 512, not sys.Blksize 508 } 509 return info.Size() 510 }