github.com/djmaze/goofys@v0.24.2/internal/dir.go (about) 1 // Copyright 2015 - 2017 Ka-Hing Cheung 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package internal 16 17 import ( 18 "fmt" 19 "sort" 20 "strings" 21 "sync" 22 "sync/atomic" 23 "syscall" 24 "time" 25 26 "github.com/aws/aws-sdk-go/aws" 27 28 "github.com/jacobsa/fuse" 29 "github.com/jacobsa/fuse/fuseops" 30 "github.com/jacobsa/fuse/fuseutil" 31 ) 32 33 type DirInodeData struct { 34 cloud StorageBackend 35 mountPrefix string 36 37 // these 2 refer to readdir of the Children 38 lastOpenDir *DirInodeData 39 lastOpenDirIdx int 40 seqOpenDirScore uint8 41 DirTime time.Time 42 43 Children []*Inode 44 } 45 46 type DirHandleEntry struct { 47 Name string 48 Inode fuseops.InodeID 49 Type fuseutil.DirentType 50 Offset fuseops.DirOffset 51 } 52 53 // Returns true if any char in `inp` has a value < '/'. 54 // This should work for unicode also: unicode chars are all greater than 128. 55 // See TestHasCharLtSlash for examples. 56 func hasCharLtSlash(inp string) bool { 57 for _, c := range inp { 58 if c < '/' { 59 return true 60 } 61 } 62 return false 63 } 64 65 // Gets the name of the blob/prefix from a full cloud path. 66 // See TestCloudPathToName for examples. 67 func cloudPathToName(inp string) string { 68 inp = strings.TrimRight(inp, "/") 69 split := strings.Split(inp, "/") 70 return split[len(split)-1] 71 } 72 73 // Returns true if the last prefix's name or last item's name from the given 74 // ListBlobsOutput has a character less than '/' 75 // See TestShouldFetchNextListBlobsPage for examples. 76 func shouldFetchNextListBlobsPage(resp *ListBlobsOutput) bool { 77 if !resp.IsTruncated { 78 // There is no next page. 79 return false 80 } 81 numPrefixes := len(resp.Prefixes) 82 numItems := len(resp.Items) 83 if numPrefixes > 0 && 84 hasCharLtSlash(cloudPathToName(*resp.Prefixes[numPrefixes-1].Prefix)) { 85 return true 86 } else if numItems > 0 && 87 hasCharLtSlash(cloudPathToName(*resp.Items[numItems-1].Key)) { 88 return true 89 } 90 return false 91 } 92 93 type DirHandle struct { 94 inode *Inode 95 96 mu sync.Mutex // everything below is protected by mu 97 98 Marker *string 99 lastFromCloud *string 100 done bool 101 // Time at which we started fetching child entries 102 // from cloud for this handle. 103 refreshStartTime time.Time 104 } 105 106 func NewDirHandle(inode *Inode) (dh *DirHandle) { 107 dh = &DirHandle{inode: inode} 108 return 109 } 110 111 func (inode *Inode) OpenDir() (dh *DirHandle) { 112 inode.logFuse("OpenDir") 113 var isS3 bool 114 115 parent := inode.Parent 116 cloud, _ := inode.cloud() 117 118 // in test we sometimes set cloud to nil to ensure we are not 119 // talking to the cloud 120 if cloud != nil { 121 _, isS3 = cloud.Delegate().(*S3Backend) 122 } 123 124 dir := inode.dir 125 if dir == nil { 126 panic(fmt.Sprintf("%v is not a directory", inode.FullName())) 127 } 128 129 if isS3 && parent != nil && inode.fs.flags.TypeCacheTTL != 0 { 130 parent.mu.Lock() 131 defer parent.mu.Unlock() 132 133 numChildren := len(parent.dir.Children) 134 dirIdx := -1 135 seqMode := false 136 firstDir := false 137 138 if parent.dir.lastOpenDir == nil { 139 // check if we are opening the first child 140 // (after . and ..) cap the search to 1000 141 // peers to bound the time. If the next dir is 142 // more than 1000 away, slurping isn't going 143 // to be helpful anyway 144 for i := 2; i < MinInt(numChildren, 1000); i++ { 145 c := parent.dir.Children[i] 146 if c.isDir() { 147 if *c.Name == *inode.Name { 148 dirIdx = i 149 seqMode = true 150 firstDir = true 151 } 152 break 153 } 154 } 155 } else { 156 // check if we are reading the next one as expected 157 for i := parent.dir.lastOpenDirIdx + 1; i < MinInt(numChildren, 1000); i++ { 158 c := parent.dir.Children[i] 159 if c.isDir() { 160 if *c.Name == *inode.Name { 161 dirIdx = i 162 seqMode = true 163 } 164 break 165 } 166 } 167 } 168 169 if seqMode { 170 if parent.dir.seqOpenDirScore < 255 { 171 parent.dir.seqOpenDirScore++ 172 } 173 if parent.dir.seqOpenDirScore == 2 { 174 fuseLog.Debugf("%v in readdir mode", *parent.FullName()) 175 } 176 parent.dir.lastOpenDir = dir 177 parent.dir.lastOpenDirIdx = dirIdx 178 if firstDir { 179 // 1) if I open a/, root's score = 1 180 // (a is the first dir), so make a/'s 181 // count at 1 too this allows us to 182 // propagate down the score for 183 // depth-first search case 184 wasSeqMode := dir.seqOpenDirScore >= 2 185 dir.seqOpenDirScore = parent.dir.seqOpenDirScore 186 if !wasSeqMode && dir.seqOpenDirScore >= 2 { 187 fuseLog.Debugf("%v in readdir mode", *inode.FullName()) 188 } 189 } 190 } else { 191 parent.dir.seqOpenDirScore = 0 192 if dirIdx == -1 { 193 dirIdx = parent.findChildIdxUnlocked(*inode.Name) 194 } 195 if dirIdx != -1 { 196 parent.dir.lastOpenDir = dir 197 parent.dir.lastOpenDirIdx = dirIdx 198 } 199 } 200 } 201 202 dh = NewDirHandle(inode) 203 return 204 } 205 206 func (dh *DirHandle) listObjectsSlurp(prefix string) (resp *ListBlobsOutput, err error) { 207 var marker *string 208 reqPrefix := prefix 209 inode := dh.inode 210 211 cloud, key := inode.cloud() 212 213 if dh.inode.Parent != nil { 214 inode = dh.inode.Parent 215 var parentCloud StorageBackend 216 parentCloud, reqPrefix = inode.cloud() 217 if parentCloud != cloud { 218 err = fmt.Errorf("cannot slurp across cloud provider") 219 return 220 } 221 222 if len(reqPrefix) != 0 { 223 reqPrefix += "/" 224 } 225 marker = &key 226 if len(*marker) != 0 { 227 *marker += "/" 228 } 229 } 230 231 params := &ListBlobsInput{ 232 Prefix: &reqPrefix, 233 StartAfter: marker, 234 } 235 236 resp, err = cloud.ListBlobs(params) 237 if err != nil { 238 s3Log.Errorf("ListObjects %v = %v", params, err) 239 return 240 } 241 242 num := len(resp.Items) 243 if num == 0 { 244 return 245 } 246 247 inode.mu.Lock() 248 inode.fs.mu.Lock() 249 250 dirs := make(map[*Inode]bool) 251 for _, obj := range resp.Items { 252 baseName := (*obj.Key)[len(reqPrefix):] 253 254 slash := strings.Index(baseName, "/") 255 if slash != -1 { 256 inode.insertSubTree(baseName, &obj, dirs) 257 } 258 } 259 inode.fs.mu.Unlock() 260 inode.mu.Unlock() 261 262 for d, sealed := range dirs { 263 if d == dh.inode { 264 // never seal the current dir because that's 265 // handled at upper layer 266 continue 267 } 268 269 if sealed || !resp.IsTruncated { 270 d.dir.DirTime = time.Now() 271 d.Attributes.Mtime = d.findChildMaxTime() 272 } 273 } 274 275 if resp.IsTruncated { 276 obj := resp.Items[len(resp.Items)-1] 277 // if we are done listing prefix, we are good 278 if strings.HasPrefix(*obj.Key, prefix) { 279 // if we are done with all the slashes, then we are good 280 baseName := (*obj.Key)[len(prefix):] 281 282 for _, c := range baseName { 283 if c <= '/' { 284 // if an entry is ex: a!b, then the 285 // next entry could be a/foo, so we 286 // are not done yet. 287 resp = nil 288 break 289 } 290 } 291 } 292 } 293 294 // we only return this response if we are totally done with listing this dir 295 if resp != nil { 296 resp.IsTruncated = false 297 resp.NextContinuationToken = nil 298 } 299 300 return 301 } 302 303 func (dh *DirHandle) listObjects(prefix string) (resp *ListBlobsOutput, err error) { 304 errSlurpChan := make(chan error, 1) 305 slurpChan := make(chan ListBlobsOutput, 1) 306 errListChan := make(chan error, 1) 307 listChan := make(chan ListBlobsOutput, 1) 308 309 fs := dh.inode.fs 310 311 // try to list without delimiter to see if we can slurp up 312 // multiple directories 313 parent := dh.inode.Parent 314 315 if dh.Marker == nil && 316 fs.flags.TypeCacheTTL != 0 && 317 (parent != nil && parent.dir.seqOpenDirScore >= 2) { 318 go func() { 319 resp, err := dh.listObjectsSlurp(prefix) 320 if err != nil { 321 errSlurpChan <- err 322 } else if resp != nil { 323 slurpChan <- *resp 324 } else { 325 errSlurpChan <- fuse.EINVAL 326 } 327 }() 328 } else { 329 errSlurpChan <- fuse.EINVAL 330 } 331 332 listObjectsFlat := func() { 333 params := &ListBlobsInput{ 334 Delimiter: aws.String("/"), 335 ContinuationToken: dh.Marker, 336 Prefix: &prefix, 337 } 338 339 cloud, _ := dh.inode.cloud() 340 341 resp, err := listBlobsSafe(cloud, params) 342 if err != nil { 343 errListChan <- err 344 } else { 345 listChan <- *resp 346 } 347 } 348 349 if !fs.flags.Cheap { 350 // invoke the fallback in parallel if desired 351 go listObjectsFlat() 352 } 353 354 // first see if we get anything from the slurp 355 select { 356 case resp := <-slurpChan: 357 return &resp, nil 358 case err = <-errSlurpChan: 359 } 360 361 if fs.flags.Cheap { 362 listObjectsFlat() 363 } 364 365 // if we got an error (which may mean slurp is not applicable, 366 // wait for regular list 367 select { 368 case resp := <-listChan: 369 return &resp, nil 370 case err = <-errListChan: 371 return 372 } 373 } 374 375 // Sorting order of entries in directories is slightly inconsistent between goofys 376 // and azblob, s3. This inconsistency can be a problem if the listing involves 377 // multiple pagination results. Call this instead of `cloud.ListBlobs` if you are 378 // paginating. 379 // 380 // Problem: In s3 & azblob, prefixes are returned with '/' => the prefix "2019" is 381 // returned as "2019/". So the list api for these backends returns "2019/" after 382 // "2019-0001/" because ascii("/") > ascii("-"). This is problematic for goofys if 383 // "2019/" is returned in x+1'th batch and "2019-0001/" is returned in x'th; Goofys 384 // stores the results as they arrive in a sorted array and expects backends to return 385 // entries in a sorted order. 386 // We cant just use ordering of s3/azblob because different cloud providers have 387 // different sorting strategies when it involes directories. In s3 "a/" > "a-b/". 388 // In adlv2 it is opposite. 389 // 390 // Solution: To deal with this our solution with follows (for all backends). For 391 // a single call of ListBlobs, we keep requesting multiple list batches until there 392 // is nothing left to list or the last listed entry has all characters > "/" 393 // Relavant test case: TestReadDirDash 394 func listBlobsSafe(cloud StorageBackend, param *ListBlobsInput) (*ListBlobsOutput, error) { 395 res, err := cloud.ListBlobs(param) 396 if err != nil { 397 return nil, err 398 } 399 400 for shouldFetchNextListBlobsPage(res) { 401 nextReq := &ListBlobsInput{ 402 // Inherit Prefix, Delimiter, MaxKeys from original request. 403 Prefix: param.Prefix, 404 Delimiter: param.Delimiter, 405 MaxKeys: param.MaxKeys, 406 // Get the continuation token from the result. 407 ContinuationToken: res.NextContinuationToken, 408 } 409 nextRes, err := cloud.ListBlobs(nextReq) 410 if err != nil { 411 return nil, err 412 } 413 414 res = &ListBlobsOutput{ 415 // Add new items and prefixes. 416 Prefixes: append(res.Prefixes, nextRes.Prefixes...), 417 Items: append(res.Items, nextRes.Items...), 418 // Inherit NextContinuationToken, IsTruncated from nextRes. 419 NextContinuationToken: nextRes.NextContinuationToken, 420 IsTruncated: nextRes.IsTruncated, 421 // We no longer have a single request. This is composite request. Concatenate 422 // new request id to exiting. 423 RequestId: res.RequestId + ", " + nextRes.RequestId, 424 } 425 } 426 return res, nil 427 } 428 429 // LOCKS_REQUIRED(dh.mu) 430 // LOCKS_EXCLUDED(dh.inode.mu) 431 // LOCKS_EXCLUDED(dh.inode.fs) 432 func (dh *DirHandle) ReadDir(offset fuseops.DirOffset) (en *DirHandleEntry, err error) { 433 en, ok := dh.inode.readDirFromCache(offset) 434 if ok { 435 return 436 } 437 438 parent := dh.inode 439 fs := parent.fs 440 441 // the dir expired, so we need to fetch from the cloud. there 442 // maybe static directories that we want to keep, so cloud 443 // listing should not overwrite them. here's what we do: 444 // 445 // 1. list from cloud and add them all to the tree, remember 446 // which one we added last 447 // 448 // 2. serve from cache 449 // 450 // 3. when we serve the entry we added last, signal that next 451 // time we need to list from cloud again with continuation 452 // token 453 for dh.lastFromCloud == nil && !dh.done { 454 if dh.Marker == nil { 455 // Marker, lastFromCloud are nil => We just started 456 // refreshing this directory info from cloud. 457 dh.refreshStartTime = time.Now() 458 } 459 dh.mu.Unlock() 460 461 var prefix string 462 _, prefix = dh.inode.cloud() 463 if len(prefix) != 0 { 464 prefix += "/" 465 } 466 467 resp, err := dh.listObjects(prefix) 468 if err != nil { 469 dh.mu.Lock() 470 return nil, err 471 } 472 473 s3Log.Debug(resp) 474 dh.mu.Lock() 475 parent.mu.Lock() 476 fs.mu.Lock() 477 478 // this is only returned for non-slurped responses 479 for _, dir := range resp.Prefixes { 480 // strip trailing / 481 dirName := (*dir.Prefix)[0 : len(*dir.Prefix)-1] 482 // strip previous prefix 483 dirName = dirName[len(prefix):] 484 if len(dirName) == 0 { 485 continue 486 } 487 488 if inode := parent.findChildUnlocked(dirName); inode != nil { 489 now := time.Now() 490 // don't want to update time if this 491 // inode is setup to never expire 492 if inode.AttrTime.Before(now) { 493 inode.AttrTime = now 494 } 495 } else { 496 inode := NewInode(fs, parent, &dirName) 497 inode.ToDir() 498 fs.insertInode(parent, inode) 499 // these are fake dir entries, we will 500 // realize the refcnt when lookup is 501 // done 502 inode.refcnt = 0 503 } 504 505 dh.lastFromCloud = &dirName 506 } 507 508 for _, obj := range resp.Items { 509 if !strings.HasPrefix(*obj.Key, prefix) { 510 // other slurped objects that we cached 511 continue 512 } 513 514 baseName := (*obj.Key)[len(prefix):] 515 516 slash := strings.Index(baseName, "/") 517 if slash == -1 { 518 if len(baseName) == 0 { 519 // shouldn't happen 520 continue 521 } 522 523 inode := parent.findChildUnlocked(baseName) 524 if inode == nil { 525 inode = NewInode(fs, parent, &baseName) 526 // these are fake dir entries, 527 // we will realize the refcnt 528 // when lookup is done 529 inode.refcnt = 0 530 fs.insertInode(parent, inode) 531 } 532 inode.SetFromBlobItem(&obj) 533 } else { 534 // this is a slurped up object which 535 // was already cached 536 baseName = baseName[:slash] 537 } 538 539 if dh.lastFromCloud == nil || 540 strings.Compare(*dh.lastFromCloud, baseName) < 0 { 541 dh.lastFromCloud = &baseName 542 } 543 } 544 545 parent.mu.Unlock() 546 fs.mu.Unlock() 547 548 if resp.IsTruncated { 549 dh.Marker = resp.NextContinuationToken 550 } else { 551 dh.Marker = nil 552 dh.done = true 553 break 554 } 555 } 556 557 parent.mu.Lock() 558 defer parent.mu.Unlock() 559 560 // Find the first non-stale child inode with offset >= 561 // `offset`. A stale inode is one that existed before the 562 // first ListBlobs for this dir handle, but is not being 563 // written to (ie: not a new file) 564 var child *Inode 565 for int(offset) < len(parent.dir.Children) { 566 // Note on locking: See comments at Inode::AttrTime, Inode::Parent. 567 childTmp := parent.dir.Children[offset] 568 if atomic.LoadInt32(&childTmp.fileHandles) == 0 && 569 childTmp.AttrTime.Before(dh.refreshStartTime) { 570 // childTmp.AttrTime < dh.refreshStartTime => the child entry was not 571 // updated from cloud by this dir Handle. 572 // So this is a stale entry that should be removed. 573 childTmp.Parent = nil 574 parent.removeChildUnlocked(childTmp) 575 } else { 576 // Found a non-stale child inode. 577 child = childTmp 578 break 579 } 580 } 581 582 if child == nil { 583 // we've reached the end 584 parent.dir.DirTime = time.Now() 585 parent.Attributes.Mtime = parent.findChildMaxTime() 586 return nil, nil 587 } 588 589 en = &DirHandleEntry{ 590 Name: *child.Name, 591 Inode: child.Id, 592 Offset: fuseops.DirOffset(offset) + 1, 593 } 594 if child.isDir() { 595 en.Type = fuseutil.DT_Directory 596 } else { 597 en.Type = fuseutil.DT_File 598 } 599 600 if dh.lastFromCloud != nil && en.Name == *dh.lastFromCloud { 601 dh.lastFromCloud = nil 602 } 603 return en, nil 604 } 605 606 func (dh *DirHandle) CloseDir() error { 607 return nil 608 } 609 610 // prefix and newPrefix should include the trailing / 611 // return all the renamed objects 612 func (dir *Inode) renameChildren(cloud StorageBackend, prefix string, 613 newParent *Inode, newPrefix string) (err error) { 614 615 var copied []string 616 var res *ListBlobsOutput 617 618 for true { 619 param := ListBlobsInput{ 620 Prefix: &prefix, 621 } 622 if res != nil { 623 param.ContinuationToken = res.NextContinuationToken 624 } 625 626 // No need to call listBlobsSafe here because we are reading the results directly 627 // unlike ReadDir which reads the results and stores it in dir object. 628 res, err = cloud.ListBlobs(¶m) 629 if err != nil { 630 return 631 } 632 633 if len(res.Items) == 0 { 634 return 635 } 636 637 if copied == nil { 638 copied = make([]string, 0, len(res.Items)) 639 } 640 641 // after the server side copy, we want to delete all the files 642 // using multi-delete, which is capped to 1000 on aws. If we 643 // are going to make an arbitrary limit that sounds like a 644 // good one (and we want to have an arbitrary limit because we 645 // don't want to rename a million objects here) 646 total := len(copied) + len(res.Items) 647 if total > 1000 || total == 1000 && res.IsTruncated { 648 return syscall.E2BIG 649 } 650 651 // say dir is "/a/dir" and it has "1", "2", "3", and we are 652 // moving it to "/b/" items will be a/dir/1, a/dir/2, a/dir/3, 653 // and we will copy them to b/1, b/2, b/3 respectively 654 for _, i := range res.Items { 655 key := (*i.Key)[len(prefix):] 656 657 // TODO: coordinate with underlining copy and do this in parallel 658 _, err = cloud.CopyBlob(&CopyBlobInput{ 659 Source: *i.Key, 660 Destination: newPrefix + key, 661 Size: &i.Size, 662 ETag: i.ETag, 663 StorageClass: i.StorageClass, 664 }) 665 if err != nil { 666 return err 667 } 668 669 copied = append(copied, *i.Key) 670 } 671 672 if !res.IsTruncated { 673 break 674 } 675 } 676 677 s3Log.Debugf("rename copied %v", copied) 678 _, err = cloud.DeleteBlobs(&DeleteBlobsInput{Items: copied}) 679 return err 680 } 681 682 // Recursively resets the DirTime for child directories. 683 // ACQUIRES_LOCK(inode.mu) 684 func (inode *Inode) resetDirTimeRec() { 685 inode.mu.Lock() 686 if inode.dir == nil { 687 inode.mu.Unlock() 688 return 689 } 690 inode.dir.DirTime = time.Time{} 691 // Make a copy of the child nodes before giving up the lock. 692 // This protects us from any addition/removal of child nodes 693 // under this node. 694 children := make([]*Inode, len(inode.dir.Children)) 695 copy(children, inode.dir.Children) 696 inode.mu.Unlock() 697 for _, child := range children { 698 child.resetDirTimeRec() 699 } 700 } 701 702 // ResetForUnmount resets the Inode as part of unmounting a storage backend 703 // mounted at the given inode. 704 // ACQUIRES_LOCK(inode.mu) 705 func (inode *Inode) ResetForUnmount() { 706 if inode.dir == nil { 707 panic(fmt.Sprintf("ResetForUnmount called on a non-directory. name:%v", 708 inode.Name)) 709 } 710 711 inode.mu.Lock() 712 // First reset the cloud info for this directory. After that, any read and 713 // write operations under this directory will not know about this cloud. 714 inode.dir.cloud = nil 715 inode.dir.mountPrefix = "" 716 717 // Clear metadata. 718 // Set the metadata values to nil instead of deleting them so that 719 // we know to fetch them again next time instead of thinking there's 720 // no metadata 721 inode.userMetadata = nil 722 inode.s3Metadata = nil 723 inode.Attributes = InodeAttributes{} 724 inode.Invalid, inode.ImplicitDir = false, false 725 inode.mu.Unlock() 726 // Reset DirTime for recursively for this node and all its child nodes. 727 // Note: resetDirTimeRec should be called without holding the lock. 728 inode.resetDirTimeRec() 729 730 } 731 732 func (parent *Inode) findPath(path string) (inode *Inode) { 733 dir := parent 734 735 for dir != nil { 736 if !dir.isDir() { 737 return nil 738 } 739 740 idx := strings.Index(path, "/") 741 if idx == -1 { 742 return dir.findChild(path) 743 } 744 dirName := path[0:idx] 745 path = path[idx+1:] 746 747 dir = dir.findChild(dirName) 748 } 749 750 return nil 751 } 752 753 func (parent *Inode) findChild(name string) (inode *Inode) { 754 parent.mu.Lock() 755 defer parent.mu.Unlock() 756 757 inode = parent.findChildUnlocked(name) 758 return 759 } 760 761 func (parent *Inode) findInodeFunc(name string) func(i int) bool { 762 return func(i int) bool { 763 return (*parent.dir.Children[i].Name) >= name 764 } 765 } 766 767 func (parent *Inode) findChildUnlocked(name string) (inode *Inode) { 768 l := len(parent.dir.Children) 769 if l == 0 { 770 return 771 } 772 i := sort.Search(l, parent.findInodeFunc(name)) 773 if i < l { 774 // found 775 if *parent.dir.Children[i].Name == name { 776 inode = parent.dir.Children[i] 777 } 778 } 779 return 780 } 781 782 func (parent *Inode) findChildIdxUnlocked(name string) int { 783 l := len(parent.dir.Children) 784 if l == 0 { 785 return -1 786 } 787 i := sort.Search(l, parent.findInodeFunc(name)) 788 if i < l && *parent.dir.Children[i].Name == name { 789 return i 790 } 791 return -1 792 } 793 794 func (parent *Inode) removeChildUnlocked(inode *Inode) { 795 l := len(parent.dir.Children) 796 if l == 0 { 797 return 798 } 799 i := sort.Search(l, parent.findInodeFunc(*inode.Name)) 800 if i >= l || *parent.dir.Children[i].Name != *inode.Name { 801 panic(fmt.Sprintf("%v.removeName(%v) but child not found: %v", 802 *parent.FullName(), *inode.Name, i)) 803 } 804 805 copy(parent.dir.Children[i:], parent.dir.Children[i+1:]) 806 parent.dir.Children[l-1] = nil 807 parent.dir.Children = parent.dir.Children[:l-1] 808 809 if cap(parent.dir.Children)-len(parent.dir.Children) > 20 { 810 tmp := make([]*Inode, len(parent.dir.Children)) 811 copy(tmp, parent.dir.Children) 812 parent.dir.Children = tmp 813 } 814 } 815 816 func (parent *Inode) removeChild(inode *Inode) { 817 parent.mu.Lock() 818 defer parent.mu.Unlock() 819 820 parent.removeChildUnlocked(inode) 821 return 822 } 823 824 func (parent *Inode) insertChild(inode *Inode) { 825 parent.mu.Lock() 826 defer parent.mu.Unlock() 827 828 parent.insertChildUnlocked(inode) 829 } 830 831 func (parent *Inode) insertChildUnlocked(inode *Inode) { 832 l := len(parent.dir.Children) 833 if l == 0 { 834 parent.dir.Children = []*Inode{inode} 835 return 836 } 837 838 i := sort.Search(l, parent.findInodeFunc(*inode.Name)) 839 if i == l { 840 // not found = new value is the biggest 841 parent.dir.Children = append(parent.dir.Children, inode) 842 } else { 843 if *parent.dir.Children[i].Name == *inode.Name { 844 panic(fmt.Sprintf("double insert of %v", parent.getChildName(*inode.Name))) 845 } 846 847 parent.dir.Children = append(parent.dir.Children, nil) 848 copy(parent.dir.Children[i+1:], parent.dir.Children[i:]) 849 parent.dir.Children[i] = inode 850 } 851 } 852 853 func (parent *Inode) LookUp(name string) (inode *Inode, err error) { 854 parent.logFuse("Inode.LookUp", name) 855 856 inode, err = parent.LookUpInodeMaybeDir(name, parent.getChildName(name)) 857 if err != nil { 858 return nil, err 859 } 860 861 return 862 } 863 864 func (parent *Inode) getChildName(name string) string { 865 if parent.Id == fuseops.RootInodeID { 866 return name 867 } else { 868 return fmt.Sprintf("%v/%v", *parent.FullName(), name) 869 } 870 } 871 872 func (parent *Inode) Unlink(name string) (err error) { 873 parent.logFuse("Unlink", name) 874 875 cloud, key := parent.cloud() 876 key = appendChildName(key, name) 877 878 _, err = cloud.DeleteBlob(&DeleteBlobInput{ 879 Key: key, 880 }) 881 if err == fuse.ENOENT { 882 // this might have been deleted out of band 883 err = nil 884 } 885 if err != nil { 886 return 887 } 888 889 parent.mu.Lock() 890 defer parent.mu.Unlock() 891 892 inode := parent.findChildUnlocked(name) 893 if inode != nil { 894 parent.removeChildUnlocked(inode) 895 inode.Parent = nil 896 } 897 898 return 899 } 900 901 func (parent *Inode) Create( 902 name string, metadata fuseops.OpMetadata) (inode *Inode, fh *FileHandle) { 903 904 parent.logFuse("Create", name) 905 906 fs := parent.fs 907 908 parent.mu.Lock() 909 defer parent.mu.Unlock() 910 911 now := time.Now() 912 inode = NewInode(fs, parent, &name) 913 inode.Attributes = InodeAttributes{ 914 Size: 0, 915 Mtime: now, 916 } 917 918 fh = NewFileHandle(inode, metadata) 919 fh.poolHandle = fs.bufferPool 920 fh.dirty = true 921 inode.fileHandles = 1 922 923 parent.touch() 924 925 return 926 } 927 928 func (parent *Inode) MkDir( 929 name string) (inode *Inode, err error) { 930 931 parent.logFuse("MkDir", name) 932 933 fs := parent.fs 934 935 cloud, key := parent.cloud() 936 key = appendChildName(key, name) 937 if !cloud.Capabilities().DirBlob { 938 key += "/" 939 } 940 params := &PutBlobInput{ 941 Key: key, 942 Body: nil, 943 DirBlob: true, 944 } 945 946 _, err = cloud.PutBlob(params) 947 if err != nil { 948 return 949 } 950 951 parent.mu.Lock() 952 defer parent.mu.Unlock() 953 954 inode = NewInode(fs, parent, &name) 955 inode.ToDir() 956 inode.touch() 957 if parent.Attributes.Mtime.Before(inode.Attributes.Mtime) { 958 parent.Attributes.Mtime = inode.Attributes.Mtime 959 } 960 961 return 962 } 963 964 func appendChildName(parent, child string) string { 965 if len(parent) != 0 { 966 parent += "/" 967 } 968 return parent + child 969 } 970 971 func (parent *Inode) isEmptyDir(fs *Goofys, name string) (isDir bool, err error) { 972 cloud, key := parent.cloud() 973 key = appendChildName(key, name) + "/" 974 975 resp, err := cloud.ListBlobs(&ListBlobsInput{ 976 Delimiter: aws.String("/"), 977 MaxKeys: PUInt32(2), 978 Prefix: &key, 979 }) 980 if err != nil { 981 return false, mapAwsError(err) 982 } 983 984 if len(resp.Prefixes) > 0 || len(resp.Items) > 1 { 985 err = fuse.ENOTEMPTY 986 isDir = true 987 return 988 } 989 990 if len(resp.Items) == 1 { 991 isDir = true 992 993 if *resp.Items[0].Key != key { 994 err = fuse.ENOTEMPTY 995 } 996 } 997 998 return 999 } 1000 1001 func (parent *Inode) RmDir(name string) (err error) { 1002 parent.logFuse("Rmdir", name) 1003 1004 isDir, err := parent.isEmptyDir(parent.fs, name) 1005 if err != nil { 1006 return 1007 } 1008 // if this was an implicit dir, isEmptyDir would have returned 1009 // isDir = false 1010 if isDir { 1011 cloud, key := parent.cloud() 1012 key = appendChildName(key, name) + "/" 1013 1014 params := DeleteBlobInput{ 1015 Key: key, 1016 } 1017 1018 _, err = cloud.DeleteBlob(¶ms) 1019 if err != nil { 1020 return 1021 } 1022 } 1023 1024 // we know this entry is gone 1025 parent.mu.Lock() 1026 defer parent.mu.Unlock() 1027 1028 inode := parent.findChildUnlocked(name) 1029 if inode != nil { 1030 parent.removeChildUnlocked(inode) 1031 inode.Parent = nil 1032 } 1033 1034 return 1035 } 1036 1037 // semantic of rename: 1038 // rename("any", "not_exists") = ok 1039 // rename("file1", "file2") = ok 1040 // rename("empty_dir1", "empty_dir2") = ok 1041 // rename("nonempty_dir1", "empty_dir2") = ok 1042 // rename("nonempty_dir1", "nonempty_dir2") = ENOTEMPTY 1043 // rename("file", "dir") = EISDIR 1044 // rename("dir", "file") = ENOTDIR 1045 func (parent *Inode) Rename(from string, newParent *Inode, to string) (err error) { 1046 parent.logFuse("Rename", from, newParent.getChildName(to)) 1047 1048 fromCloud, fromPath := parent.cloud() 1049 toCloud, toPath := newParent.cloud() 1050 if fromCloud != toCloud { 1051 // cannot rename across cloud backend 1052 err = fuse.EINVAL 1053 return 1054 } 1055 1056 fromFullName := appendChildName(fromPath, from) 1057 fs := parent.fs 1058 1059 var size *uint64 1060 var fromIsDir bool 1061 var toIsDir bool 1062 var renameChildren bool 1063 1064 fromIsDir, err = parent.isEmptyDir(fs, from) 1065 if err != nil { 1066 if err == fuse.ENOTEMPTY { 1067 renameChildren = true 1068 } else { 1069 return 1070 } 1071 } 1072 1073 toFullName := appendChildName(toPath, to) 1074 1075 toIsDir, err = parent.isEmptyDir(fs, to) 1076 if err != nil { 1077 return 1078 } 1079 1080 if fromIsDir && !toIsDir { 1081 _, err = fromCloud.HeadBlob(&HeadBlobInput{ 1082 Key: toFullName, 1083 }) 1084 if err == nil { 1085 return fuse.ENOTDIR 1086 } else { 1087 err = mapAwsError(err) 1088 if err != fuse.ENOENT { 1089 return 1090 } 1091 } 1092 } else if !fromIsDir && toIsDir { 1093 return syscall.EISDIR 1094 } 1095 1096 if fromIsDir { 1097 fromFullName += "/" 1098 toFullName += "/" 1099 size = PUInt64(0) 1100 } 1101 1102 if renameChildren && !fromCloud.Capabilities().DirBlob { 1103 err = parent.renameChildren(fromCloud, fromFullName, 1104 newParent, toFullName) 1105 if err != nil { 1106 return 1107 } 1108 } else { 1109 err = parent.renameObject(fs, size, fromFullName, toFullName) 1110 } 1111 return 1112 } 1113 1114 func (parent *Inode) renameObject(fs *Goofys, size *uint64, fromFullName string, toFullName string) (err error) { 1115 cloud, _ := parent.cloud() 1116 1117 _, err = cloud.RenameBlob(&RenameBlobInput{ 1118 Source: fromFullName, 1119 Destination: toFullName, 1120 }) 1121 if err == nil || err != syscall.ENOTSUP { 1122 return 1123 } 1124 1125 _, err = cloud.CopyBlob(&CopyBlobInput{ 1126 Source: fromFullName, 1127 Destination: toFullName, 1128 Size: size, 1129 }) 1130 if err != nil { 1131 return 1132 } 1133 1134 _, err = cloud.DeleteBlob(&DeleteBlobInput{ 1135 Key: fromFullName, 1136 }) 1137 if err != nil { 1138 return 1139 } 1140 s3Log.Debugf("Deleted %v", fromFullName) 1141 1142 return 1143 } 1144 1145 // if I had seen a/ and a/b, and now I get a/c, that means a/b is 1146 // done, but not a/ 1147 func (parent *Inode) isParentOf(inode *Inode) bool { 1148 return inode.Parent != nil && (parent == inode.Parent || parent.isParentOf(inode.Parent)) 1149 } 1150 1151 func sealPastDirs(dirs map[*Inode]bool, d *Inode) { 1152 for p, sealed := range dirs { 1153 if p != d && !sealed && !p.isParentOf(d) { 1154 dirs[p] = true 1155 } 1156 } 1157 // I just read something in d, obviously it's not done yet 1158 dirs[d] = false 1159 } 1160 1161 // LOCKS_REQUIRED(fs.mu) 1162 // LOCKS_REQUIRED(parent.mu) 1163 // LOCKS_REQUIRED(parent.fs.mu) 1164 func (parent *Inode) insertSubTree(path string, obj *BlobItemOutput, dirs map[*Inode]bool) { 1165 fs := parent.fs 1166 slash := strings.Index(path, "/") 1167 if slash == -1 { 1168 inode := parent.findChildUnlocked(path) 1169 if inode == nil { 1170 inode = NewInode(fs, parent, &path) 1171 inode.refcnt = 0 1172 fs.insertInode(parent, inode) 1173 inode.SetFromBlobItem(obj) 1174 } else { 1175 // our locking order is most specific lock 1176 // first, ie: lock a/b before a/. But here we 1177 // already have a/ and also global lock. For 1178 // new inode we don't care about that 1179 // violation because no one else will take 1180 // that lock anyway 1181 fs.mu.Unlock() 1182 parent.mu.Unlock() 1183 inode.SetFromBlobItem(obj) 1184 parent.mu.Lock() 1185 fs.mu.Lock() 1186 } 1187 sealPastDirs(dirs, parent) 1188 } else { 1189 dir := path[:slash] 1190 path = path[slash+1:] 1191 1192 if len(path) == 0 { 1193 inode := parent.findChildUnlocked(dir) 1194 if inode == nil { 1195 inode = NewInode(fs, parent, &dir) 1196 inode.ToDir() 1197 inode.refcnt = 0 1198 fs.insertInode(parent, inode) 1199 inode.SetFromBlobItem(obj) 1200 } else if !inode.isDir() { 1201 inode.ToDir() 1202 fs.addDotAndDotDot(inode) 1203 } else { 1204 fs.mu.Unlock() 1205 parent.mu.Unlock() 1206 inode.SetFromBlobItem(obj) 1207 parent.mu.Lock() 1208 fs.mu.Lock() 1209 } 1210 sealPastDirs(dirs, inode) 1211 } else { 1212 // ensure that the potentially implicit dir is added 1213 inode := parent.findChildUnlocked(dir) 1214 if inode == nil { 1215 inode = NewInode(fs, parent, &dir) 1216 inode.ToDir() 1217 inode.refcnt = 0 1218 fs.insertInode(parent, inode) 1219 } else if !inode.isDir() { 1220 inode.ToDir() 1221 fs.addDotAndDotDot(inode) 1222 } 1223 now := time.Now() 1224 if inode.AttrTime.Before(now) { 1225 inode.AttrTime = now 1226 } 1227 1228 // mark this dir but don't seal anything else 1229 // until we get to the leaf 1230 dirs[inode] = false 1231 1232 fs.mu.Unlock() 1233 parent.mu.Unlock() 1234 inode.mu.Lock() 1235 fs.mu.Lock() 1236 inode.insertSubTree(path, obj, dirs) 1237 inode.mu.Unlock() 1238 fs.mu.Unlock() 1239 parent.mu.Lock() 1240 fs.mu.Lock() 1241 } 1242 } 1243 } 1244 1245 func (parent *Inode) findChildMaxTime() time.Time { 1246 maxTime := parent.Attributes.Mtime 1247 1248 for i, c := range parent.dir.Children { 1249 if i < 2 { 1250 // skip . and .. 1251 continue 1252 } 1253 if c.Attributes.Mtime.After(maxTime) { 1254 maxTime = c.Attributes.Mtime 1255 } 1256 } 1257 1258 return maxTime 1259 } 1260 1261 func (parent *Inode) readDirFromCache(offset fuseops.DirOffset) (en *DirHandleEntry, ok bool) { 1262 parent.mu.Lock() 1263 defer parent.mu.Unlock() 1264 1265 if parent.dir == nil { 1266 panic(*parent.FullName()) 1267 } 1268 if !expired(parent.dir.DirTime, parent.fs.flags.TypeCacheTTL) { 1269 ok = true 1270 1271 if int(offset) >= len(parent.dir.Children) { 1272 return 1273 } 1274 child := parent.dir.Children[offset] 1275 1276 en = &DirHandleEntry{ 1277 Name: *child.Name, 1278 Inode: child.Id, 1279 Offset: offset + 1, 1280 } 1281 if child.isDir() { 1282 en.Type = fuseutil.DT_Directory 1283 } else { 1284 en.Type = fuseutil.DT_File 1285 } 1286 1287 } 1288 return 1289 } 1290 1291 func (parent *Inode) LookUpInodeNotDir(name string, c chan HeadBlobOutput, errc chan error) { 1292 cloud, key := parent.cloud() 1293 key = appendChildName(key, name) 1294 params := &HeadBlobInput{Key: key} 1295 resp, err := cloud.HeadBlob(params) 1296 if err != nil { 1297 errc <- mapAwsError(err) 1298 return 1299 } 1300 1301 s3Log.Debug(resp) 1302 c <- *resp 1303 } 1304 1305 func (parent *Inode) LookUpInodeDir(name string, c chan ListBlobsOutput, errc chan error) { 1306 cloud, key := parent.cloud() 1307 key = appendChildName(key, name) + "/" 1308 1309 resp, err := cloud.ListBlobs(&ListBlobsInput{ 1310 Delimiter: aws.String("/"), 1311 MaxKeys: PUInt32(1), 1312 Prefix: &key, 1313 }) 1314 1315 if err != nil { 1316 errc <- err 1317 return 1318 } 1319 1320 s3Log.Debug(resp) 1321 c <- *resp 1322 } 1323 1324 // returned inode has nil Id 1325 func (parent *Inode) LookUpInodeMaybeDir(name string, fullName string) (inode *Inode, err error) { 1326 errObjectChan := make(chan error, 1) 1327 objectChan := make(chan HeadBlobOutput, 2) 1328 errDirBlobChan := make(chan error, 1) 1329 var errDirChan chan error 1330 var dirChan chan ListBlobsOutput 1331 1332 checking := 3 1333 var checkErr [3]error 1334 1335 cloud, _ := parent.cloud() 1336 if cloud == nil { 1337 panic("s3 disabled") 1338 } 1339 1340 go parent.LookUpInodeNotDir(name, objectChan, errObjectChan) 1341 if !cloud.Capabilities().DirBlob && !parent.fs.flags.Cheap { 1342 go parent.LookUpInodeNotDir(name+"/", objectChan, errDirBlobChan) 1343 if !parent.fs.flags.ExplicitDir { 1344 errDirChan = make(chan error, 1) 1345 dirChan = make(chan ListBlobsOutput, 1) 1346 go parent.LookUpInodeDir(name, dirChan, errDirChan) 1347 } 1348 } 1349 1350 for { 1351 select { 1352 case resp := <-objectChan: 1353 err = nil 1354 inode = NewInode(parent.fs, parent, &name) 1355 if !resp.IsDirBlob { 1356 // XXX/TODO if both object and object/ exists, return dir 1357 inode.SetFromBlobItem(&resp.BlobItemOutput) 1358 } else { 1359 inode.ToDir() 1360 if resp.LastModified != nil { 1361 inode.Attributes.Mtime = *resp.LastModified 1362 } 1363 } 1364 inode.fillXattrFromHead(&resp) 1365 return 1366 case err = <-errObjectChan: 1367 checking-- 1368 checkErr[0] = err 1369 s3Log.Debugf("HEAD %v = %v", fullName, err) 1370 case resp := <-dirChan: 1371 err = nil 1372 if len(resp.Prefixes) != 0 || len(resp.Items) != 0 { 1373 inode = NewInode(parent.fs, parent, &name) 1374 inode.ToDir() 1375 if len(resp.Items) != 0 && *resp.Items[0].Key == name+"/" { 1376 // it's actually a dir blob 1377 entry := resp.Items[0] 1378 if entry.ETag != nil { 1379 inode.s3Metadata["etag"] = []byte(*entry.ETag) 1380 } 1381 if entry.StorageClass != nil { 1382 inode.s3Metadata["storage-class"] = []byte(*entry.StorageClass) 1383 } 1384 1385 } 1386 // if cheap is not on, the dir blob 1387 // could exist but this returned first 1388 if inode.fs.flags.Cheap { 1389 inode.ImplicitDir = true 1390 } 1391 return 1392 } else { 1393 checkErr[2] = fuse.ENOENT 1394 checking-- 1395 } 1396 case err = <-errDirChan: 1397 checking-- 1398 checkErr[2] = err 1399 s3Log.Debugf("LIST %v/ = %v", fullName, err) 1400 case err = <-errDirBlobChan: 1401 checking-- 1402 checkErr[1] = err 1403 s3Log.Debugf("HEAD %v/ = %v", fullName, err) 1404 } 1405 1406 if cloud.Capabilities().DirBlob { 1407 return 1408 } 1409 1410 switch checking { 1411 case 2: 1412 if parent.fs.flags.Cheap { 1413 go parent.LookUpInodeNotDir(name+"/", objectChan, errDirBlobChan) 1414 } 1415 case 1: 1416 if parent.fs.flags.ExplicitDir { 1417 checkErr[2] = fuse.ENOENT 1418 goto doneCase 1419 } else if parent.fs.flags.Cheap { 1420 errDirChan = make(chan error, 1) 1421 dirChan = make(chan ListBlobsOutput, 1) 1422 go parent.LookUpInodeDir(name, dirChan, errDirChan) 1423 } 1424 break 1425 doneCase: 1426 fallthrough 1427 case 0: 1428 for _, e := range checkErr { 1429 if e != fuse.ENOENT { 1430 err = e 1431 return 1432 } 1433 } 1434 1435 err = fuse.ENOENT 1436 return 1437 } 1438 } 1439 }