github.com/matrixorigin/matrixone@v0.7.0/pkg/fileservice/local_etl_fs.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fileservice 16 17 import ( 18 "bytes" 19 "context" 20 "io" 21 "os" 22 pathpkg "path" 23 "path/filepath" 24 "sort" 25 "strings" 26 "sync" 27 28 "github.com/matrixorigin/matrixone/pkg/common/moerr" 29 ) 30 31 // LocalETLFS is a FileService implementation backed by local file system and suitable for ETL operations 32 type LocalETLFS struct { 33 name string 34 rootPath string 35 36 sync.RWMutex 37 dirFiles map[string]*os.File 38 39 createTempDirOnce sync.Once 40 } 41 42 var _ FileService = new(LocalETLFS) 43 44 func NewLocalETLFS(name string, rootPath string) (*LocalETLFS, error) { 45 return &LocalETLFS{ 46 name: name, 47 rootPath: rootPath, 48 dirFiles: make(map[string]*os.File), 49 }, nil 50 } 51 52 func (l *LocalETLFS) Name() string { 53 return l.name 54 } 55 56 func (l *LocalETLFS) ensureTempDir() (err error) { 57 l.createTempDirOnce.Do(func() { 58 err = os.MkdirAll(filepath.Join(l.rootPath, ".tmp"), 0755) 59 }) 60 return 61 } 62 63 func (l *LocalETLFS) Write(ctx context.Context, vector IOVector) error { 64 select { 65 case <-ctx.Done(): 66 return ctx.Err() 67 default: 68 } 69 70 path, err := ParsePathAtService(vector.FilePath, l.name) 71 if err != nil { 72 return err 73 } 74 nativePath := l.toNativeFilePath(path.File) 75 76 // check existence 77 _, err = os.Stat(nativePath) 78 if err == nil { 79 // existed 80 return moerr.NewFileAlreadyExistsNoCtx(path.File) 81 } 82 83 return l.write(ctx, vector) 84 } 85 86 func (l *LocalETLFS) write(ctx context.Context, vector IOVector) error { 87 path, err := ParsePathAtService(vector.FilePath, l.name) 88 if err != nil { 89 return err 90 } 91 nativePath := l.toNativeFilePath(path.File) 92 93 // sort 94 sort.Slice(vector.Entries, func(i, j int) bool { 95 return vector.Entries[i].Offset < vector.Entries[j].Offset 96 }) 97 98 // size 99 var size int64 100 if len(vector.Entries) > 0 { 101 last := vector.Entries[len(vector.Entries)-1] 102 size = int64(last.Offset + last.Size) 103 } 104 105 // write 106 if err := l.ensureTempDir(); err != nil { 107 return err 108 } 109 f, err := os.CreateTemp( 110 filepath.Join(l.rootPath, ".tmp"), 111 "*.tmp", 112 ) 113 if err != nil { 114 return err 115 } 116 n, err := io.Copy(f, newIOEntriesReader(ctx, vector.Entries)) 117 if err != nil { 118 return err 119 } 120 if n != size { 121 sizeUnknown := false 122 for _, entry := range vector.Entries { 123 if entry.Size < 0 { 124 sizeUnknown = true 125 break 126 } 127 } 128 if !sizeUnknown { 129 return moerr.NewSizeNotMatchNoCtx(path.File) 130 } 131 } 132 if err := f.Close(); err != nil { 133 return err 134 } 135 136 // ensure parent dir 137 parentDir, _ := filepath.Split(nativePath) 138 err = l.ensureDir(parentDir) 139 if err != nil { 140 return err 141 } 142 143 // move 144 if err := os.Rename(f.Name(), nativePath); err != nil { 145 return err 146 } 147 148 if err := l.syncDir(parentDir); err != nil { 149 return err 150 } 151 152 return nil 153 } 154 155 func (l *LocalETLFS) Read(ctx context.Context, vector *IOVector) error { 156 select { 157 case <-ctx.Done(): 158 return ctx.Err() 159 default: 160 } 161 162 if len(vector.Entries) == 0 { 163 return moerr.NewEmptyVectorNoCtx() 164 } 165 166 path, err := ParsePathAtService(vector.FilePath, l.name) 167 if err != nil { 168 return err 169 } 170 nativePath := l.toNativeFilePath(path.File) 171 172 _, err = os.Stat(nativePath) 173 if os.IsNotExist(err) { 174 return moerr.NewFileNotFoundNoCtx(path.File) 175 } 176 if err != nil { 177 return err 178 } 179 180 for i, entry := range vector.Entries { 181 if entry.Size == 0 { 182 return moerr.NewEmptyRangeNoCtx(path.File) 183 } 184 185 if entry.done { 186 continue 187 } 188 189 if entry.WriterForRead != nil { 190 f, err := os.Open(nativePath) 191 if os.IsNotExist(err) { 192 return moerr.NewFileNotFoundNoCtx(path.File) 193 } 194 if err != nil { 195 return err 196 } 197 defer f.Close() 198 if entry.Offset > 0 { 199 if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil { 200 return err 201 } 202 } 203 r := (io.Reader)(f) 204 if entry.Size > 0 { 205 r = io.LimitReader(r, int64(entry.Size)) 206 } 207 208 if entry.ToObject != nil { 209 r = io.TeeReader(r, entry.WriterForRead) 210 cr := &countingReader{ 211 R: r, 212 } 213 obj, size, err := entry.ToObject(cr, nil) 214 if err != nil { 215 return err 216 } 217 vector.Entries[i].Object = obj 218 vector.Entries[i].ObjectSize = size 219 if entry.Size > 0 && cr.N != entry.Size { 220 return moerr.NewUnexpectedEOFNoCtx(path.File) 221 } 222 223 } else { 224 n, err := io.Copy(entry.WriterForRead, r) 225 if err != nil { 226 return err 227 } 228 if entry.Size > 0 && n != int64(entry.Size) { 229 return moerr.NewUnexpectedEOFNoCtx(path.File) 230 } 231 } 232 233 } else if entry.ReadCloserForRead != nil { 234 f, err := os.Open(nativePath) 235 if os.IsNotExist(err) { 236 return moerr.NewFileNotFoundNoCtx(path.File) 237 } 238 if err != nil { 239 return err 240 } 241 if entry.Offset > 0 { 242 if _, err := f.Seek(int64(entry.Offset), io.SeekStart); err != nil { 243 return err 244 } 245 } 246 r := (io.Reader)(f) 247 if entry.Size > 0 { 248 r = io.LimitReader(r, int64(entry.Size)) 249 } 250 if entry.ToObject == nil { 251 *entry.ReadCloserForRead = &readCloser{ 252 r: r, 253 closeFunc: f.Close, 254 } 255 } else { 256 buf := new(bytes.Buffer) 257 *entry.ReadCloserForRead = &readCloser{ 258 r: io.TeeReader(r, buf), 259 closeFunc: func() error { 260 defer f.Close() 261 obj, size, err := entry.ToObject(buf, buf.Bytes()) 262 if err != nil { 263 return err 264 } 265 vector.Entries[i].Object = obj 266 vector.Entries[i].ObjectSize = size 267 return nil 268 }, 269 } 270 } 271 272 } else { 273 f, err := os.Open(nativePath) 274 if os.IsNotExist(err) { 275 return moerr.NewFileNotFoundNoCtx(path.File) 276 } 277 if err != nil { 278 return err 279 } 280 defer f.Close() 281 282 if entry.Offset > 0 { 283 _, err = f.Seek(int64(entry.Offset), io.SeekStart) 284 if err != nil { 285 return err 286 } 287 } 288 r := (io.Reader)(f) 289 if entry.Size > 0 { 290 r = io.LimitReader(r, int64(entry.Size)) 291 } 292 293 if entry.Size < 0 { 294 data, err := io.ReadAll(r) 295 if err != nil { 296 return err 297 } 298 entry.Data = data 299 entry.Size = int64(len(data)) 300 301 } else { 302 if int64(len(entry.Data)) < entry.Size { 303 entry.Data = make([]byte, entry.Size) 304 } 305 n, err := io.ReadFull(r, entry.Data) 306 if err != nil { 307 return err 308 } 309 if int64(n) != entry.Size { 310 return moerr.NewUnexpectedEOFNoCtx(path.File) 311 } 312 } 313 314 if err := entry.setObjectFromData(); err != nil { 315 return err 316 } 317 318 vector.Entries[i] = entry 319 } 320 321 } 322 323 return nil 324 325 } 326 327 func (l *LocalETLFS) StatFile(ctx context.Context, filePath string) (*DirEntry, error) { 328 select { 329 case <-ctx.Done(): 330 return nil, ctx.Err() 331 default: 332 } 333 334 path, err := ParsePathAtService(filePath, l.name) 335 if err != nil { 336 return nil, err 337 } 338 nativePath := l.toNativeFilePath(path.File) 339 340 stat, err := os.Stat(nativePath) 341 if os.IsNotExist(err) { 342 return nil, moerr.NewFileNotFoundNoCtx(path.File) 343 } 344 if err != nil { 345 return nil, err 346 } 347 348 if stat.IsDir() { 349 return nil, moerr.NewFileNotFoundNoCtx(path.File) 350 } 351 352 return &DirEntry{ 353 Name: pathpkg.Base(filePath), 354 IsDir: false, 355 Size: stat.Size(), 356 }, nil 357 } 358 359 func (l *LocalETLFS) List(ctx context.Context, dirPath string) (ret []DirEntry, err error) { 360 select { 361 case <-ctx.Done(): 362 return nil, ctx.Err() 363 default: 364 } 365 366 path, err := ParsePathAtService(dirPath, l.name) 367 if err != nil { 368 return nil, err 369 } 370 nativePath := l.toNativeFilePath(path.File) 371 372 f, err := os.Open(nativePath) 373 if os.IsNotExist(err) { 374 err = nil 375 return 376 } 377 if err != nil { 378 return nil, err 379 } 380 defer f.Close() 381 382 entries, err := f.ReadDir(-1) 383 for _, entry := range entries { 384 name := entry.Name() 385 if strings.HasPrefix(name, ".") { 386 continue 387 } 388 info, err := entry.Info() 389 if err != nil { 390 return nil, err 391 } 392 isDir, err := entryIsDir(nativePath, name, info) 393 if err != nil { 394 return nil, err 395 } 396 ret = append(ret, DirEntry{ 397 Name: name, 398 IsDir: isDir, 399 Size: info.Size(), 400 }) 401 } 402 403 sort.Slice(ret, func(i, j int) bool { 404 return ret[i].Name < ret[j].Name 405 }) 406 407 if err != nil { 408 return ret, err 409 } 410 411 return 412 } 413 414 func (l *LocalETLFS) Delete(ctx context.Context, filePaths ...string) error { 415 select { 416 case <-ctx.Done(): 417 return ctx.Err() 418 default: 419 } 420 421 for _, filePath := range filePaths { 422 if err := l.deleteSingle(ctx, filePath); err != nil { 423 return err 424 } 425 } 426 return nil 427 } 428 429 func (l *LocalETLFS) deleteSingle(ctx context.Context, filePath string) error { 430 path, err := ParsePathAtService(filePath, l.name) 431 if err != nil { 432 return err 433 } 434 nativePath := l.toNativeFilePath(path.File) 435 436 _, err = os.Stat(nativePath) 437 if os.IsNotExist(err) { 438 return moerr.NewFileNotFoundNoCtx(path.File) 439 } 440 if err != nil { 441 return err 442 } 443 444 err = os.Remove(nativePath) 445 if err != nil { 446 return err 447 } 448 449 parentDir, _ := filepath.Split(nativePath) 450 err = l.syncDir(parentDir) 451 if err != nil { 452 return err 453 } 454 455 return nil 456 } 457 458 func (l *LocalETLFS) ensureDir(nativePath string) error { 459 nativePath = filepath.Clean(nativePath) 460 if nativePath == "" { 461 return nil 462 } 463 464 // check existence by l.dirFiles 465 l.RLock() 466 _, ok := l.dirFiles[nativePath] 467 if ok { 468 // dir existed 469 l.RUnlock() 470 return nil 471 } 472 l.RUnlock() 473 474 // check existence by fstat 475 _, err := os.Stat(nativePath) 476 if err == nil { 477 // existed 478 return nil 479 } 480 481 // ensure parent 482 parent, _ := filepath.Split(nativePath) 483 if parent != nativePath { 484 if err := l.ensureDir(parent); err != nil { 485 return err 486 } 487 } 488 489 // create 490 if err := os.Mkdir(nativePath, 0755); err != nil { 491 return err 492 } 493 494 // sync parent dir 495 if err := l.syncDir(parent); err != nil { 496 return err 497 } 498 499 return nil 500 } 501 502 func (l *LocalETLFS) toOSPath(filePath string) string { 503 if os.PathSeparator == '/' { 504 return filePath 505 } 506 return strings.ReplaceAll(filePath, "/", osPathSeparatorStr) 507 } 508 509 func (l *LocalETLFS) syncDir(nativePath string) error { 510 l.Lock() 511 f, ok := l.dirFiles[nativePath] 512 if !ok { 513 var err error 514 f, err = os.Open(nativePath) 515 if err != nil { 516 l.Unlock() 517 return err 518 } 519 l.dirFiles[nativePath] = f 520 } 521 l.Unlock() 522 if err := f.Sync(); err != nil { 523 return err 524 } 525 return nil 526 } 527 528 func (l *LocalETLFS) toNativeFilePath(filePath string) string { 529 return filepath.Join(l.rootPath, l.toOSPath(filePath)) 530 } 531 532 var _ ETLFileService = new(LocalETLFS) 533 534 func (l *LocalETLFS) ETLCompatible() {} 535 536 var _ MutableFileService = new(LocalETLFS) 537 538 func (l *LocalETLFS) NewMutator(filePath string) (Mutator, error) { 539 path, err := ParsePathAtService(filePath, l.name) 540 if err != nil { 541 return nil, err 542 } 543 nativePath := l.toNativeFilePath(path.File) 544 f, err := os.OpenFile(nativePath, os.O_RDWR, 0644) 545 if os.IsNotExist(err) { 546 return nil, moerr.NewFileNotFoundNoCtx(path.File) 547 } 548 return &LocalETLFSMutator{ 549 osFile: f, 550 }, nil 551 } 552 553 type LocalETLFSMutator struct { 554 osFile *os.File 555 } 556 557 func (l *LocalETLFSMutator) Mutate(ctx context.Context, entries ...IOEntry) error { 558 return l.mutate(ctx, 0, entries...) 559 } 560 561 func (l *LocalETLFSMutator) Append(ctx context.Context, entries ...IOEntry) error { 562 offset, err := l.osFile.Seek(0, io.SeekEnd) 563 if err != nil { 564 return err 565 } 566 return l.mutate(ctx, offset, entries...) 567 } 568 569 func (l *LocalETLFSMutator) mutate(ctx context.Context, baseOffset int64, entries ...IOEntry) error { 570 select { 571 case <-ctx.Done(): 572 return ctx.Err() 573 default: 574 } 575 576 // write 577 for _, entry := range entries { 578 579 if entry.ReaderForWrite != nil { 580 // seek and copy 581 _, err := l.osFile.Seek(int64(entry.Offset+baseOffset), 0) 582 if err != nil { 583 return err 584 } 585 n, err := io.Copy(l.osFile, entry.ReaderForWrite) 586 if err != nil { 587 return err 588 } 589 if n != entry.Size { 590 return moerr.NewSizeNotMatchNoCtx("") 591 } 592 593 } else { 594 // WriteAt 595 n, err := l.osFile.WriteAt(entry.Data, int64(entry.Offset+baseOffset)) 596 if err != nil { 597 return err 598 } 599 if int64(n) != entry.Size { 600 return moerr.NewSizeNotMatchNoCtx("") 601 } 602 } 603 604 } 605 606 return nil 607 } 608 609 func (l *LocalETLFSMutator) Close() error { 610 // sync 611 if err := l.osFile.Sync(); err != nil { 612 return err 613 } 614 615 // close 616 if err := l.osFile.Close(); err != nil { 617 return err 618 } 619 620 return nil 621 }