github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/local_etl_fs.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fileservice 16 17 import ( 18 "bytes" 19 "context" 20 "io" 21 "os" 22 pathpkg "path" 23 "path/filepath" 24 "sort" 25 "strings" 26 "sync" 27 "sync/atomic" 28 29 "github.com/matrixorigin/matrixone/pkg/common/moerr" 30 ) 31 32 // LocalETLFS is a FileService implementation backed by local file system and suitable for ETL operations 33 type LocalETLFS struct { 34 name string 35 rootPath string 36 37 sync.RWMutex 38 dirFiles map[string]*os.File 39 } 40 41 var _ FileService = new(LocalETLFS) 42 43 func NewLocalETLFS(name string, rootPath string) (*LocalETLFS, error) { 44 45 // get absolute path 46 if rootPath != "" { 47 var err error 48 rootPath, err = filepath.Abs(rootPath) 49 if err != nil { 50 return nil, err 51 } 52 53 // ensure dir 54 f, err := os.Open(rootPath) 55 if os.IsNotExist(err) { 56 // not exists, create 57 err := os.MkdirAll(rootPath, 0755) 58 if err != nil { 59 return nil, err 60 } 61 62 } else if err != nil { 63 // stat error 64 return nil, err 65 66 } else { 67 defer f.Close() 68 } 69 70 } 71 72 return &LocalETLFS{ 73 name: name, 74 rootPath: rootPath, 75 dirFiles: make(map[string]*os.File), 76 }, nil 77 } 78 79 func (l *LocalETLFS) Name() string { 80 return l.name 81 } 82 83 func (l *LocalETLFS) Close() { 84 } 85 86 func (l *LocalETLFS) Write(ctx context.Context, vector IOVector) error { 87 select { 88 case <-ctx.Done(): 89 return ctx.Err() 90 default: 91 } 92 93 path, err := ParsePathAtService(vector.FilePath, l.name) 94 if err != nil { 95 return err 96 } 97 nativePath := l.toNativeFilePath(path.File) 98 99 // check existence 100 _, err = os.Stat(nativePath) 101 if err == nil { 102 // existed 103 return moerr.NewFileAlreadyExistsNoCtx(path.File) 104 } 105 106 return l.write(ctx, vector) 107 } 108 109 func (l *LocalETLFS) write(ctx context.Context, vector IOVector) error { 110 path, err := ParsePathAtService(vector.FilePath, l.name) 111 if err != nil { 112 return err 113 } 114 nativePath := l.toNativeFilePath(path.File) 115 116 // sort 117 sort.Slice(vector.Entries, func(i, j int) bool { 118 return vector.Entries[i].Offset < vector.Entries[j].Offset 119 }) 120 121 // size 122 var size int64 123 if len(vector.Entries) > 0 { 124 last := vector.Entries[len(vector.Entries)-1] 125 size = int64(last.Offset + last.Size) 126 } 127 128 r := newIOEntriesReader(ctx, vector.Entries) 129 130 // write 131 f, err := os.CreateTemp( 132 l.rootPath, 133 ".tmp.*", 134 ) 135 if err != nil { 136 return err 137 } 138 var buf []byte 139 put := ioBufferPool.Get(&buf) 140 defer put.Put() 141 n, err := io.CopyBuffer(f, r, buf) 142 if err != nil { 143 return err 144 } 145 if n != size { 146 sizeUnknown := false 147 for _, entry := range vector.Entries { 148 if entry.Size < 0 { 149 sizeUnknown = true 150 break 151 } 152 } 153 if !sizeUnknown { 154 return moerr.NewSizeNotMatchNoCtx(path.File) 155 } 156 } 157 if err := f.Close(); err != nil { 158 return err 159 } 160 161 // ensure parent dir 162 parentDir, _ := filepath.Split(nativePath) 163 err = l.ensureDir(parentDir) 164 if err != nil { 165 return err 166 } 167 168 // move 169 if err := os.Rename(f.Name(), nativePath); err != nil { 170 return err 171 } 172 173 if err := l.syncDir(parentDir); err != nil { 174 return err 175 } 176 177 return nil 178 } 179 180 func (l *LocalETLFS) Read(ctx context.Context, vector *IOVector) error { 181 select { 182 case <-ctx.Done(): 183 return ctx.Err() 184 default: 185 } 186 187 if len(vector.Entries) == 0 { 188 return moerr.NewEmptyVectorNoCtx() 189 } 190 191 path, err := ParsePathAtService(vector.FilePath, l.name) 192 if err != nil { 193 return err 194 } 195 nativePath := l.toNativeFilePath(path.File) 196 197 _, err = os.Stat(nativePath) 198 if os.IsNotExist(err) { 199 return moerr.NewFileNotFoundNoCtx(path.File) 200 } 201 if err != nil { 202 return err 203 } 204 205 for i, entry := range vector.Entries { 206 if entry.Size == 0 { 207 return moerr.NewEmptyRangeNoCtx(path.File) 208 } 209 210 if entry.done { 211 continue 212 } 213 214 if entry.WriterForRead != nil { 215 f, err := os.Open(nativePath) 216 if os.IsNotExist(err) { 217 return moerr.NewFileNotFoundNoCtx(path.File) 218 } 219 if err != nil { 220 return err 221 } 222 defer f.Close() 223 if entry.Offset > 0 { 224 if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil { 225 return err 226 } 227 } 228 r := (io.Reader)(f) 229 if entry.Size > 0 { 230 r = io.LimitReader(r, int64(entry.Size)) 231 } 232 233 if entry.ToCacheData != nil { 234 r = io.TeeReader(r, entry.WriterForRead) 235 counter := new(atomic.Int64) 236 cr := &countingReader{ 237 R: r, 238 C: counter, 239 } 240 cacheData, err := entry.ToCacheData(cr, nil, DefaultCacheDataAllocator) 241 if err != nil { 242 return err 243 } 244 vector.Entries[i].CachedData = cacheData 245 if entry.Size > 0 && counter.Load() != entry.Size { 246 return moerr.NewUnexpectedEOFNoCtx(path.File) 247 } 248 249 } else { 250 var buf []byte 251 put := ioBufferPool.Get(&buf) 252 defer put.Put() 253 n, err := io.CopyBuffer(entry.WriterForRead, r, buf) 254 if err != nil { 255 return err 256 } 257 if entry.Size > 0 && n != int64(entry.Size) { 258 return moerr.NewUnexpectedEOFNoCtx(path.File) 259 } 260 } 261 262 } else if entry.ReadCloserForRead != nil { 263 f, err := os.Open(nativePath) 264 if os.IsNotExist(err) { 265 return moerr.NewFileNotFoundNoCtx(path.File) 266 } 267 if err != nil { 268 return err 269 } 270 if entry.Offset > 0 { 271 if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil { 272 return err 273 } 274 } 275 r := (io.Reader)(f) 276 if entry.Size > 0 { 277 r = io.LimitReader(r, int64(entry.Size)) 278 } 279 if entry.ToCacheData == nil { 280 *entry.ReadCloserForRead = &readCloser{ 281 r: r, 282 closeFunc: f.Close, 283 } 284 } else { 285 buf := new(bytes.Buffer) 286 *entry.ReadCloserForRead = &readCloser{ 287 r: io.TeeReader(r, buf), 288 closeFunc: func() error { 289 defer f.Close() 290 cacheData, err := entry.ToCacheData(buf, buf.Bytes(), DefaultCacheDataAllocator) 291 if err != nil { 292 return err 293 } 294 vector.Entries[i].CachedData = cacheData 295 return nil 296 }, 297 } 298 } 299 300 } else { 301 f, err := os.Open(nativePath) 302 if os.IsNotExist(err) { 303 return moerr.NewFileNotFoundNoCtx(path.File) 304 } 305 if err != nil { 306 return err 307 } 308 defer f.Close() 309 310 if entry.Offset > 0 { 311 _, err = f.Seek(int64(entry.Offset), io.SeekStart) 312 if err != nil { 313 return err 314 } 315 } 316 r := (io.Reader)(f) 317 if entry.Size > 0 { 318 r = io.LimitReader(r, int64(entry.Size)) 319 } 320 321 if entry.Size < 0 { 322 data, err := io.ReadAll(r) 323 if err != nil { 324 return err 325 } 326 entry.Data = data 327 entry.Size = int64(len(data)) 328 329 } else { 330 if int64(len(entry.Data)) < entry.Size { 331 entry.Data = make([]byte, entry.Size) 332 } 333 n, err := io.ReadFull(r, entry.Data) 334 if err != nil { 335 return err 336 } 337 if int64(n) != entry.Size { 338 return moerr.NewUnexpectedEOFNoCtx(path.File) 339 } 340 } 341 342 if err := entry.setCachedData(); err != nil { 343 return err 344 } 345 346 vector.Entries[i] = entry 347 } 348 349 } 350 351 return nil 352 353 } 354 355 func (l *LocalETLFS) ReadCache(ctx context.Context, vector *IOVector) error { 356 return nil 357 } 358 359 func (l *LocalETLFS) StatFile(ctx context.Context, filePath string) (*DirEntry, error) { 360 select { 361 case <-ctx.Done(): 362 return nil, ctx.Err() 363 default: 364 } 365 366 path, err := ParsePathAtService(filePath, l.name) 367 if err != nil { 368 return nil, err 369 } 370 nativePath := l.toNativeFilePath(path.File) 371 372 stat, err := os.Stat(nativePath) 373 if os.IsNotExist(err) { 374 return nil, moerr.NewFileNotFoundNoCtx(path.File) 375 } 376 if err != nil { 377 return nil, err 378 } 379 380 if stat.IsDir() { 381 return nil, moerr.NewFileNotFoundNoCtx(path.File) 382 } 383 384 return &DirEntry{ 385 Name: pathpkg.Base(filePath), 386 IsDir: false, 387 Size: stat.Size(), 388 }, nil 389 } 390 391 func (l *LocalETLFS) PrefetchFile(ctx context.Context, filePath string) error { 392 return nil 393 } 394 395 func (l *LocalETLFS) List(ctx context.Context, dirPath string) (ret []DirEntry, err error) { 396 select { 397 case <-ctx.Done(): 398 return nil, ctx.Err() 399 default: 400 } 401 402 path, err := ParsePathAtService(dirPath, l.name) 403 if err != nil { 404 return nil, err 405 } 406 nativePath := l.toNativeFilePath(path.File) 407 408 f, err := os.Open(nativePath) 409 if os.IsNotExist(err) { 410 err = nil 411 return 412 } 413 if err != nil { 414 return nil, err 415 } 416 defer f.Close() 417 418 entries, err := f.ReadDir(-1) 419 for _, entry := range entries { 420 name := entry.Name() 421 if strings.HasPrefix(name, ".") { 422 continue 423 } 424 info, err := entry.Info() 425 if err != nil { 426 return nil, err 427 } 428 isDir, err := entryIsDir(nativePath, name, info) 429 if err != nil { 430 return nil, err 431 } 432 ret = append(ret, DirEntry{ 433 Name: name, 434 IsDir: isDir, 435 Size: info.Size(), 436 }) 437 } 438 439 sort.Slice(ret, func(i, j int) bool { 440 return ret[i].Name < ret[j].Name 441 }) 442 443 if err != nil { 444 return ret, err 445 } 446 447 return 448 } 449 450 func (l *LocalETLFS) Delete(ctx context.Context, filePaths ...string) error { 451 select { 452 case <-ctx.Done(): 453 return ctx.Err() 454 default: 455 } 456 457 for _, filePath := range filePaths { 458 if err := l.deleteSingle(ctx, filePath); err != nil { 459 return err 460 } 461 } 462 return nil 463 } 464 465 func (l *LocalETLFS) deleteSingle(ctx context.Context, filePath string) error { 466 path, err := ParsePathAtService(filePath, l.name) 467 if err != nil { 468 return err 469 } 470 nativePath := l.toNativeFilePath(path.File) 471 472 _, err = os.Stat(nativePath) 473 if err != nil { 474 if os.IsNotExist(err) { 475 // ignore not found error 476 return nil 477 } 478 return err 479 } 480 481 err = os.Remove(nativePath) 482 if err != nil { 483 return err 484 } 485 486 parentDir, _ := filepath.Split(nativePath) 487 err = l.syncDir(parentDir) 488 if err != nil { 489 return err 490 } 491 492 return nil 493 } 494 495 func (l *LocalETLFS) ensureDir(nativePath string) error { 496 nativePath = filepath.Clean(nativePath) 497 if nativePath == "" { 498 return nil 499 } 500 501 // check existence by l.dirFiles 502 l.RLock() 503 _, ok := l.dirFiles[nativePath] 504 if ok { 505 // dir existed 506 l.RUnlock() 507 return nil 508 } 509 l.RUnlock() 510 511 // check existence by fstat 512 _, err := os.Stat(nativePath) 513 if err == nil { 514 // existed 515 return nil 516 } 517 518 // ensure parent 519 parent, _ := filepath.Split(nativePath) 520 if parent != nativePath { 521 if err := l.ensureDir(parent); err != nil { 522 return err 523 } 524 } 525 526 // create 527 if err := os.Mkdir(nativePath, 0755); err != nil { 528 if os.IsExist(err) { 529 // existed 530 return nil 531 } 532 return err 533 } 534 535 // sync parent dir 536 if err := l.syncDir(parent); err != nil { 537 return err 538 } 539 540 return nil 541 } 542 543 func (l *LocalETLFS) toOSPath(filePath string) string { 544 if os.PathSeparator == '/' { 545 return filePath 546 } 547 return strings.ReplaceAll(filePath, "/", osPathSeparatorStr) 548 } 549 550 func (l *LocalETLFS) syncDir(nativePath string) error { 551 l.Lock() 552 f, ok := l.dirFiles[nativePath] 553 if !ok { 554 var err error 555 f, err = os.Open(nativePath) 556 if err != nil { 557 l.Unlock() 558 return err 559 } 560 l.dirFiles[nativePath] = f 561 } 562 l.Unlock() 563 if err := f.Sync(); err != nil { 564 return err 565 } 566 return nil 567 } 568 569 func (l *LocalETLFS) toNativeFilePath(filePath string) string { 570 return filepath.Join(l.rootPath, l.toOSPath(filePath)) 571 } 572 573 var _ ETLFileService = new(LocalETLFS) 574 575 func (l *LocalETLFS) ETLCompatible() {} 576 577 var _ MutableFileService = new(LocalETLFS) 578 579 func (l *LocalETLFS) NewMutator(ctx context.Context, filePath string) (Mutator, error) { 580 path, err := ParsePathAtService(filePath, l.name) 581 if err != nil { 582 return nil, err 583 } 584 nativePath := l.toNativeFilePath(path.File) 585 f, err := os.OpenFile(nativePath, os.O_RDWR, 0644) 586 if os.IsNotExist(err) { 587 return nil, moerr.NewFileNotFoundNoCtx(path.File) 588 } 589 return &LocalETLFSMutator{ 590 osFile: f, 591 }, nil 592 } 593 594 type LocalETLFSMutator struct { 595 osFile *os.File 596 } 597 598 func (l *LocalETLFSMutator) Mutate(ctx context.Context, entries ...IOEntry) error { 599 return l.mutate(ctx, 0, entries...) 600 } 601 602 func (l *LocalETLFSMutator) Append(ctx context.Context, entries ...IOEntry) error { 603 offset, err := l.osFile.Seek(0, io.SeekEnd) 604 if err != nil { 605 return err 606 } 607 return l.mutate(ctx, offset, entries...) 608 } 609 610 func (l *LocalETLFSMutator) mutate(ctx context.Context, baseOffset int64, entries ...IOEntry) error { 611 select { 612 case <-ctx.Done(): 613 return ctx.Err() 614 default: 615 } 616 617 // write 618 for _, entry := range entries { 619 620 if entry.ReaderForWrite != nil { 621 // seek and copy 622 _, err := l.osFile.Seek(int64(entry.Offset+baseOffset), 0) 623 if err != nil { 624 return err 625 } 626 var buf []byte 627 put := ioBufferPool.Get(&buf) 628 defer put.Put() 629 n, err := io.CopyBuffer(l.osFile, entry.ReaderForWrite, buf) 630 if err != nil { 631 return err 632 } 633 if n != entry.Size { 634 return moerr.NewSizeNotMatchNoCtx("") 635 } 636 637 } else { 638 // WriteAt 639 n, err := l.osFile.WriteAt(entry.Data, int64(entry.Offset+baseOffset)) 640 if err != nil { 641 return err 642 } 643 if int64(n) != entry.Size { 644 return moerr.NewSizeNotMatchNoCtx("") 645 } 646 } 647 648 } 649 650 return nil 651 } 652 653 func (l *LocalETLFSMutator) Close() error { 654 // sync 655 if err := l.osFile.Sync(); err != nil { 656 return err 657 } 658 659 // close 660 if err := l.osFile.Close(); err != nil { 661 return err 662 } 663 664 return nil 665 }