github.com/yandex-cloud/geesefs@v0.40.9/internal/handles.go (about)

     1  // Copyright 2015 - 2017 Ka-Hing Cheung
     2  // Copyright 2021 Yandex LLC
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package internal
    17  
    18  import (
    19  	"bytes"
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"net/url"
    24  	"os"
    25  	"sort"
    26  	"strconv"
    27  	"strings"
    28  	"sync"
    29  	"sync/atomic"
    30  	"syscall"
    31  	"time"
    32  
    33  	"github.com/jacobsa/fuse/fuseops"
    34  
    35  	"github.com/sirupsen/logrus"
    36  
    37  	"github.com/yandex-cloud/geesefs/internal/cfg"
    38  )
    39  
    40  const (
    41  	ST_CACHED int32 = 0
    42  	ST_DEAD int32 = 1
    43  	ST_CREATED int32 = 2
    44  	ST_MODIFIED int32 = 3
    45  	ST_DELETED int32 = 4
    46  )
    47  
    48  type NodeId uint64
    49  
    50  type Joinable interface {
    51  	Join(ctx context.Context) error
    52  }
    53  
    54  type MountedFS interface {
    55  	Join(ctx context.Context) error
    56  	Unmount() error
    57  }
    58  
    59  type InodeAttributes struct {
    60  	Size  uint64
    61  	Mtime time.Time
    62  	Ctime time.Time
    63  	Uid   uint32
    64  	Gid   uint32
    65  	Rdev  uint32
    66  	Mode  os.FileMode
    67  }
    68  
    69  type ReadRange struct {
    70  	Offset uint64
    71  	Size uint64
    72  	Flushing bool
    73  }
    74  
    75  type MPUPart struct {
    76  	Num uint32
    77  	Offset uint64
    78  	Size uint64
    79  	ETag string
    80  }
    81  
    82  type Inode struct {
    83  	Id         fuseops.InodeID
    84  	Name       string
    85  	fs         *Goofys
    86  	Attributes InodeAttributes
    87  	// It is generally safe to read `AttrTime` without locking because if some other
    88  	// operation is modifying `AttrTime`, in most cases the reader is okay with working with
    89  	// stale data. But Time is a struct and modifying it is not atomic. However
    90  	// in practice (until the year 2157) we should be okay because
    91  	// - Almost all uses of AttrTime will be about comparisons (AttrTime < x, AttrTime > x)
    92  	// - Time object will have Time::monotonic bit set (until the year 2157) => the time
    93  	//   comparision just compares Time::ext field
    94  	// Ref: https://github.com/golang/go/blob/e42ae65a8507/src/time/time.go#L12:L56
    95  	AttrTime time.Time
    96  	ExpireTime time.Time
    97  
    98  	mu sync.Mutex // everything below is protected by mu
    99  	readCond *sync.Cond
   100  	pauseWriters int
   101  
   102  	// We are not very consistent about enforcing locks for `Parent` because, the
   103  	// parent field very very rarely changes and it is generally fine to operate on
   104  	// stale parent information
   105  	Parent *Inode
   106  
   107  	dir *DirInodeData
   108  
   109  	fileHandles int32
   110  	lastWriteEnd uint64
   111  
   112  	// cached/buffered data
   113  	CacheState int32
   114  	dirtyQueueId uint64
   115  	buffers BufferList
   116  	readRanges []ReadRange
   117  	DiskFDQueueID uint64
   118  	DiskCacheFD *os.File
   119  	OnDisk bool
   120  	forceFlush bool
   121  	IsFlushing int
   122  	flushError error
   123  	flushErrorTime time.Time
   124  	readError error
   125  	// renamed from: parent, name
   126  	oldParent *Inode
   127  	oldName string
   128  	// is already being renamed to the current name
   129  	renamingTo bool
   130  
   131  	// multipart upload state
   132  	mpu *MultipartBlobCommitInput
   133  
   134  	userMetadataDirty int
   135  	userMetadata map[string][]byte
   136  	s3Metadata   map[string][]byte
   137  
   138  	// last known size and etag from the cloud
   139  	knownSize uint64
   140  	knownETag string
   141  
   142  	// the refcnt is an exception, it's protected with atomic access
   143  	// being part of parent.dir.Children increases refcnt by 1
   144  	refcnt int64
   145  
   146  	// Cluster Mode
   147  
   148  	ownerMu    sync.RWMutex
   149  	ownerTerm  uint64
   150  	owner      NodeId
   151  	readyOwner bool
   152  }
   153  
   154  func NewInode(fs *Goofys, parent *Inode, name string) (inode *Inode) {
   155  	if strings.Index(name, "/") != -1 {
   156  		fuseLog.Errorf("%v is not a valid name", name)
   157  	}
   158  
   159  	inode = &Inode{
   160  		Name:       name,
   161  		fs:         fs,
   162  		Attributes: InodeAttributes{
   163  			Uid:    fs.flags.Uid,
   164  			Gid:    fs.flags.Gid,
   165  			Mode:   fs.flags.FileMode,
   166  		},
   167  		AttrTime:   time.Now(),
   168  		Parent:     parent,
   169  		s3Metadata: make(map[string][]byte),
   170  		refcnt:     0,
   171  	}
   172  
   173  	inode.buffers.helpers = inode
   174  
   175  	return
   176  }
   177  
   178  // For BufferListHelpers
   179  func (inode *Inode) PartNum(offset uint64) uint64 {
   180  	return inode.fs.partNum(offset)
   181  }
   182  
   183  // For BufferListHelpers
   184  func (inode *Inode) QueueCleanBuffer(buf *FileBuffer) {
   185  	inode.fs.cleanQueue.Add(inode, buf)
   186  }
   187  
   188  // For BufferListHelpers
   189  func (inode *Inode) UnqueueCleanBuffer(buf *FileBuffer) {
   190  	inode.fs.cleanQueue.Delete(buf)
   191  }
   192  
   193  // LOCKS_EXCLUDED(inode.mu)
   194  func (inode *Inode) SetFromBlobItem(item *BlobItemOutput) {
   195  	inode.mu.Lock()
   196  	defer inode.mu.Unlock()
   197  
   198  	patchInProgress := inode.fs.flags.UsePatch && inode.mpu == nil && inode.CacheState == ST_MODIFIED && inode.IsFlushing > 0
   199  	// We always just drop our local cache when inode size or etag changes remotely
   200  	// It's the simplest method of conflict resolution
   201  	// Otherwise we may not be able to make a correct object version
   202  	//
   203  	// If ongoing patch requests exist, then concurrent etag changes is normal. In current implementation
   204  	// it is hard to reliably distinguish actual data conflicts from concurrent patch updates.
   205  	if !patchInProgress && (item.ETag != nil && inode.knownETag != *item.ETag || item.Size != inode.knownSize) {
   206  		if inode.CacheState != ST_CACHED && (inode.knownETag != "" || inode.knownSize > 0) {
   207  			s3Log.Warnf("Conflict detected (inode %v): server-side ETag or size of %v"+
   208  				" (%v, %v) differs from local (%v, %v). File is changed remotely, dropping cache",
   209  				inode.Id, inode.FullName(), NilStr(item.ETag), item.Size, inode.knownETag, inode.knownSize)
   210  		}
   211  		inode.resetCache()
   212  		inode.Attributes.Size = item.Size
   213  		inode.knownSize = item.Size
   214  		if item.LastModified != nil {
   215  			inode.Attributes.Mtime = *item.LastModified
   216  			inode.Attributes.Ctime = *item.LastModified
   217  		} else {
   218  			inode.Attributes.Mtime = inode.fs.rootAttrs.Ctime
   219  			inode.Attributes.Ctime = inode.fs.rootAttrs.Ctime
   220  		}
   221  		if item.Metadata != nil {
   222  			inode.setMetadata(item.Metadata)
   223  			inode.userMetadataDirty = 0
   224  		}
   225  	}
   226  	if item.ETag != nil {
   227  		inode.s3Metadata["etag"] = []byte(*item.ETag)
   228  		inode.knownETag = *item.ETag
   229  	} else {
   230  		delete(inode.s3Metadata, "etag")
   231  	}
   232  	if item.StorageClass != nil {
   233  		inode.s3Metadata["storage-class"] = []byte(*item.StorageClass)
   234  	} else {
   235  		delete(inode.s3Metadata, "storage-class")
   236  	}
   237  	now := time.Now()
   238  	// don't want to update time if this inode is setup to never expire
   239  	if inode.AttrTime.Before(now) {
   240  		inode.SetAttrTime(now)
   241  	}
   242  }
   243  
   244  // LOCKS_REQUIRED(inode.mu)
   245  func (inode *Inode) cloud() (cloud StorageBackend, path string) {
   246  	var prefix string
   247  	var dir *Inode
   248  
   249  	if inode.dir == nil {
   250  		path = inode.Name
   251  		dir = inode.Parent
   252  	} else {
   253  		dir = inode
   254  	}
   255  
   256  	for p := dir; p != nil; p = p.Parent {
   257  		if p.dir.cloud != nil {
   258  			cloud = p.dir.cloud
   259  			// the error backend produces a mount.err file
   260  			// at the root and is not aware of prefix
   261  			_, isErr := cloud.(StorageBackendInitError)
   262  			if !isErr {
   263  				// we call init here instead of
   264  				// relying on the wrapper to call init
   265  				// because we want to return the right
   266  				// prefix
   267  				if c, ok := cloud.(*StorageBackendInitWrapper); ok {
   268  					err := c.Init("")
   269  					isErr = err != nil
   270  				}
   271  			}
   272  
   273  			if !isErr {
   274  				prefix = p.dir.mountPrefix
   275  			}
   276  			break
   277  		}
   278  
   279  		if path == "" {
   280  			path = p.Name
   281  		} else if p.Parent != nil {
   282  			// don't prepend if I am already the root node
   283  			path = p.Name + "/" + path
   284  		}
   285  	}
   286  
   287  	if path == "" {
   288  		path = strings.TrimRight(prefix, "/")
   289  	} else {
   290  		path = prefix + path
   291  	}
   292  	return
   293  }
   294  
   295  func (inode *Inode) FullName() string {
   296  	if inode.Parent == nil {
   297  		return inode.Name
   298  	} else {
   299  		return inode.Parent.getChildName(inode.Name)
   300  	}
   301  }
   302  
   303  func (inode *Inode) touch() {
   304  	inode.Attributes.Mtime = time.Now()
   305  	inode.Attributes.Ctime = time.Now()
   306  }
   307  
   308  func (inode *Inode) InflateAttributes() (attr fuseops.InodeAttributes) {
   309  	mtime := inode.Attributes.Mtime
   310  	if mtime.IsZero() {
   311  		mtime = inode.fs.rootAttrs.Mtime
   312  	}
   313  
   314  	attr = fuseops.InodeAttributes{
   315  		Size:   inode.Attributes.Size,
   316  		Atime:  inode.Attributes.Ctime,
   317  		Mtime:  mtime,
   318  		Ctime:  inode.Attributes.Ctime,
   319  		Crtime: mtime,
   320  		Uid:    inode.Attributes.Uid,
   321  		Gid:    inode.Attributes.Gid,
   322  		Mode:   inode.Attributes.Mode,
   323  		Rdev:   inode.Attributes.Rdev,
   324  	}
   325  
   326  	if inode.dir != nil {
   327  		attr.Nlink = 2
   328  		attr.Mode = attr.Mode & os.ModePerm | os.ModeDir
   329  	} else if inode.userMetadata != nil && inode.userMetadata[inode.fs.flags.SymlinkAttr] != nil {
   330  		attr.Nlink = 1
   331  		attr.Mode = attr.Mode & os.ModePerm | os.ModeSymlink
   332  	} else {
   333  		attr.Nlink = 1
   334  	}
   335  
   336  	return
   337  }
   338  
   339  func (inode *Inode) logFuse(op string, args ...interface{}) {
   340  	if fuseLog.Level >= logrus.DebugLevel {
   341  		fuseLog.Debugln(op, inode.Id, inode.FullName(), args)
   342  	}
   343  }
   344  
   345  func (inode *Inode) errFuse(op string, args ...interface{}) {
   346  	fuseLog.Errorln(op, inode.Id, inode.FullName(), args)
   347  }
   348  
   349  func (inode *Inode) ToDir() {
   350  	if inode.dir == nil {
   351  		inode.Attributes = InodeAttributes{
   352  			Size: 4096,
   353  			Uid:  inode.Attributes.Uid,
   354  			Gid:  inode.Attributes.Gid,
   355  			Mode: inode.fs.flags.DirMode | os.ModeDir,
   356  			// Ctime, Mtime intentionally not initialized
   357  		}
   358  		inode.dir = &DirInodeData{
   359  			lastOpenDirIdx: -1,
   360  		}
   361  	}
   362  }
   363  
   364  func (inode *Inode) Ref() {
   365  	res := atomic.AddInt64(&inode.refcnt, 1)
   366  	inode.logFuse("Ref", res)
   367  	return
   368  }
   369  
   370  // LOCKS_REQUIRED(inode.mu)
   371  // LOCKS_EXCLUDED(fs.mu)
   372  func (inode *Inode) DeRef(n int64) (stale bool) {
   373  	res := atomic.AddInt64(&inode.refcnt, -n)
   374  	if res < 0 {
   375  		fuseLog.Errorf("Deref underflow: deref inode %v (%v) by %v from %v", inode.Id, inode.FullName(), n, res+n)
   376  		atomic.StoreInt64(&inode.refcnt, 0)
   377  		res = 0
   378  	} else {
   379  		inode.logFuse("DeRef", n, res)
   380  	}
   381  	if res == 0 && inode.CacheState <= ST_DEAD {
   382  		inode.resetCache()
   383  		inode.fs.mu.Lock()
   384  		inode.resetExpireTime()
   385  		delete(inode.fs.inodes, inode.Id)
   386  		inode.fs.forgotCnt += 1
   387  		inode.fs.mu.Unlock()
   388  	}
   389  	return res == 0
   390  }
   391  
   392  // LOCKS_REQUIRED(inode.mu)
   393  // LOCKS_EXCLUDED(inode.fs.mu)
   394  func (inode *Inode) SetAttrTime(tm time.Time) {
   395  	inode.AttrTime = tm
   396  	// Expire when at least both AttrTime+TTL & ExpireTime pass
   397  	// AttrTime is required for Windows where we don't use SetExpireTime()
   398  	inode.SetExpireTime(tm.Add(inode.fs.flags.StatCacheTTL))
   399  }
   400  
   401  // LOCKS_REQUIRED(inode.mu)
   402  // LOCKS_EXCLUDED(inode.fs.mu)
   403  func (inode *Inode) SetExpireTime(tm time.Time) {
   404  	// Only rewind expire time forward. I.e. it's more ExtendExpireTime than SetExpireTime
   405  	if inode.ExpireTime.After(tm) {
   406  		return
   407  	}
   408  	oldTime := inode.ExpireTime.Unix()
   409  	newTime := tm.Unix()
   410  	inode.ExpireTime = tm
   411  	inode.fs.mu.Lock()
   412  	oldMap := inode.fs.inodesByTime[oldTime]
   413  	if oldMap != nil {
   414  		delete(oldMap, inode.Id)
   415  		if len(oldMap) == 0 {
   416  			delete(inode.fs.inodesByTime, oldTime)
   417  		}
   418  	}
   419  	if !tm.IsZero() {
   420  		newMap := inode.fs.inodesByTime[newTime]
   421  		if newMap == nil {
   422  			newMap = make(map[fuseops.InodeID]bool)
   423  			inode.fs.inodesByTime[newTime] = newMap
   424  		}
   425  		newMap[inode.Id] = true
   426  	}
   427  	inode.fs.mu.Unlock()
   428  }
   429  
   430  // LOCKS_REQUIRED(inode.mu)
   431  // LOCKS_REQUIRED(inode.fs.mu)
   432  func (inode *Inode) resetExpireTime() {
   433  	oldTime := inode.ExpireTime.Unix()
   434  	inode.ExpireTime = time.Time{}
   435  	oldMap := inode.fs.inodesByTime[oldTime]
   436  	if oldMap != nil {
   437  		delete(oldMap, inode.Id)
   438  		if len(oldMap) == 0 {
   439  			delete(inode.fs.inodesByTime, oldTime)
   440  		}
   441  	}
   442  }
   443  
   444  // LOCKS_EXCLUDED(inode.mu)
   445  // LOCKS_EXCLUDED(inode.fs.mu)
   446  func (inode *Inode) SetExpireLocked(tm time.Time) {
   447  	inode.mu.Lock()
   448  	inode.SetExpireTime(tm)
   449  	inode.mu.Unlock()
   450  }
   451  
   452  // LOCKS_EXCLUDED(inode.mu)
   453  func (inode *Inode) GetAttributes() *fuseops.InodeAttributes {
   454  	inode.mu.Lock()
   455  	attr := inode.InflateAttributes()
   456  	inode.mu.Unlock()
   457  	return &attr
   458  }
   459  
   460  func (inode *Inode) isDir() bool {
   461  	return inode.dir != nil
   462  }
   463  
   464  func RetryHeadBlob(flags *cfg.FlagStorage, cloud StorageBackend, req *HeadBlobInput) (resp *HeadBlobOutput, err error) {
   465  	ReadBackoff(flags, func(attempt int) error {
   466  		resp, err = cloud.HeadBlob(req)
   467  		if err != nil && shouldRetry(err) {
   468  			s3Log.Warnf("Error getting metadata of %v (attempt %v): %v\n", req.Key, attempt, err)
   469  		}
   470  		return err
   471  	})
   472  	return
   473  }
   474  
   475  // LOCKS_REQUIRED(inode.mu)
   476  func (inode *Inode) fillXattrFromHead(resp *HeadBlobOutput) {
   477  	if resp.ETag != nil {
   478  		inode.s3Metadata["etag"] = []byte(*resp.ETag)
   479  	}
   480  	if resp.StorageClass != nil {
   481  		inode.s3Metadata["storage-class"] = []byte(*resp.StorageClass)
   482  	} else {
   483  		inode.s3Metadata["storage-class"] = []byte("STANDARD")
   484  	}
   485  
   486  	inode.setMetadata(resp.Metadata)
   487  }
   488  
   489  // LOCKS_REQUIRED(inode.mu)
   490  func (inode *Inode) setUserMeta(key string, value []byte) error {
   491  	if inode.userMetadata == nil {
   492  		if value == nil {
   493  			return nil
   494  		}
   495  		err := inode.fillXattr()
   496  		if err != nil {
   497  			return err
   498  		}
   499  	}
   500  	oldValue, exists := inode.userMetadata[key]
   501  	if value == nil {
   502  		if !exists {
   503  			return nil
   504  		}
   505  		delete(inode.userMetadata, key)
   506  	} else {
   507  		if exists && bytes.Compare(oldValue, value) == 0 {
   508  			return nil
   509  		}
   510  		inode.userMetadata[key] = value
   511  	}
   512  	inode.userMetadataDirty = 2
   513  	return nil
   514  }
   515  
   516  // LOCKS_REQUIRED(inode.mu)
   517  func (inode *Inode) setMetadata(metadata map[string]*string) {
   518  	inode.userMetadata = unescapeMetadata(metadata)
   519  	if inode.userMetadata != nil {
   520  		if inode.fs.flags.EnableMtime {
   521  			mtimeStr := inode.userMetadata[inode.fs.flags.MtimeAttr]
   522  			if mtimeStr != nil {
   523  				i, err := strconv.ParseUint(string(mtimeStr), 0, 64)
   524  				if err == nil {
   525  					inode.Attributes.Mtime = time.Unix(int64(i), 0)
   526  				}
   527  			}
   528  		}
   529  		if inode.fs.flags.EnablePerms {
   530  			uidStr := inode.userMetadata[inode.fs.flags.UidAttr]
   531  			if uidStr != nil {
   532  				i, err := strconv.ParseUint(string(uidStr), 0, 32)
   533  				if err == nil {
   534  					inode.Attributes.Uid = uint32(i)
   535  				}
   536  			}
   537  			gidStr := inode.userMetadata[inode.fs.flags.GidAttr]
   538  			if gidStr != nil {
   539  				i, err := strconv.ParseUint(string(gidStr), 0, 32)
   540  				if err == nil {
   541  					inode.Attributes.Gid = uint32(i)
   542  				}
   543  			}
   544  		}
   545  		if inode.fs.flags.EnablePerms || inode.fs.flags.EnableSpecials {
   546  			modeStr := inode.userMetadata[inode.fs.flags.FileModeAttr]
   547  			if modeStr != nil {
   548  				i, err := strconv.ParseUint(string(modeStr), 0, 32)
   549  				if err == nil {
   550  					fm := fuseops.ConvertFileMode(uint32(i))
   551  					var mask os.FileMode
   552  					if inode.fs.flags.EnablePerms {
   553  						mask = os.ModePerm
   554  					}
   555  					if inode.fs.flags.EnableSpecials && (inode.Attributes.Mode & os.ModeType) == 0 {
   556  						mask = mask | os.ModeType
   557  					}
   558  					rmMask := (os.ModePerm | os.ModeType) ^ mask
   559  					inode.Attributes.Mode = inode.Attributes.Mode & rmMask | (fm & mask)
   560  					if (inode.Attributes.Mode & os.ModeDevice) != 0 {
   561  						rdev, _ := strconv.ParseUint(string(inode.userMetadata[inode.fs.flags.RdevAttr]), 0, 32)
   562  						inode.Attributes.Rdev = uint32(rdev)
   563  					}
   564  				}
   565  			}
   566  		}
   567  	}
   568  }
   569  
   570  func (inode *Inode) setFileMode(newMode os.FileMode) (changed bool, err error) {
   571  	prevMode := inode.Attributes.Mode
   572  	if inode.fs.flags.EnableSpecials {
   573  		if (newMode & os.ModeDir) != (inode.Attributes.Mode & os.ModeDir) {
   574  			if (newMode & os.ModeDir) != 0 {
   575  				return false, syscall.ENOTDIR
   576  			} else {
   577  				return false, syscall.EISDIR
   578  			}
   579  		}
   580  		inode.Attributes.Mode = (inode.Attributes.Mode & os.ModePerm) | (newMode & os.ModeType)
   581  	}
   582  	if inode.fs.flags.EnablePerms {
   583  		inode.Attributes.Mode = (inode.Attributes.Mode & os.ModeType) | (newMode & os.ModePerm)
   584  	}
   585  	changed = (prevMode != inode.Attributes.Mode)
   586  	var defaultMode os.FileMode
   587  	if inode.dir != nil {
   588  		defaultMode = inode.fs.flags.DirMode | os.ModeDir
   589  	} else {
   590  		defaultMode = inode.fs.flags.FileMode
   591  	}
   592  	if (inode.Attributes.Mode & os.ModeDevice) != 0 {
   593  		err = inode.setUserMeta(inode.fs.flags.RdevAttr, []byte(fmt.Sprintf("%d", inode.Attributes.Rdev)))
   594  		if err != nil {
   595  			return
   596  		}
   597  	}
   598  	if inode.Attributes.Mode != defaultMode {
   599  		err = inode.setUserMeta(inode.fs.flags.FileModeAttr, []byte(fmt.Sprintf("%d", fuseops.ConvertGolangMode(inode.Attributes.Mode))))
   600  	} else {
   601  		err = inode.setUserMeta(inode.fs.flags.FileModeAttr, nil)
   602  	}
   603  	return
   604  }
   605  
   606  // FIXME: Move all these xattr-related functions to file.go
   607  
   608  // LOCKS_REQUIRED(inode.mu)
   609  func (inode *Inode) fillXattr() (err error) {
   610  	if inode.userMetadata != nil {
   611  		return nil
   612  	}
   613  	if inode.dir != nil && inode.dir.ImplicitDir {
   614  		inode.userMetadata = make(map[string][]byte)
   615  		return nil
   616  	}
   617  	cloud, key := inode.cloud()
   618  	if inode.oldParent != nil {
   619  		_, key = inode.oldParent.cloud()
   620  		key = appendChildName(key, inode.oldName)
   621  	}
   622  	if inode.isDir() {
   623  		key += "/"
   624  	}
   625  	inode.mu.Unlock()
   626  	resp, err := RetryHeadBlob(inode.fs.flags, cloud, &HeadBlobInput{Key: key})
   627  	inode.mu.Lock()
   628  	if err != nil {
   629  		err = mapAwsError(err)
   630  		if err == syscall.ENOENT {
   631  			err = nil
   632  			if inode.isDir() {
   633  				inode.dir.ImplicitDir = true
   634  			}
   635  		}
   636  		return err
   637  	} else if inode.userMetadata == nil {
   638  		inode.fillXattrFromHead(resp)
   639  	}
   640  	return
   641  }
   642  
   643  // LOCKS_REQUIRED(inode.mu)
   644  func (inode *Inode) getXattrMap(name string, userOnly bool) (
   645  	meta map[string][]byte, newName string, err error) {
   646  
   647  	cloud, _ := inode.cloud()
   648  	xattrPrefix := cloud.Capabilities().Name + "."
   649  
   650  	if strings.HasPrefix(name, xattrPrefix) {
   651  		if userOnly {
   652  			return nil, "", syscall.EPERM
   653  		}
   654  
   655  		newName = name[len(xattrPrefix):]
   656  		meta = inode.s3Metadata
   657  	} else if strings.HasPrefix(name, "user.") && name != "user."+inode.fs.flags.SymlinkAttr {
   658  		err = inode.fillXattr()
   659  		if err != nil {
   660  			return nil, "", err
   661  		}
   662  
   663  		newName = name[5:]
   664  		meta = inode.userMetadata
   665  	} else {
   666  		if userOnly {
   667  			return nil, "", syscall.EPERM
   668  		} else {
   669  			return nil, "", ENOATTR
   670  		}
   671  	}
   672  
   673  	if meta == nil {
   674  		return nil, "", ENOATTR
   675  	}
   676  
   677  	return
   678  }
   679  
   680  func escapeMetadata(meta map[string][]byte) (metadata map[string]*string) {
   681  	if meta == nil {
   682  		return
   683  	}
   684  	metadata = make(map[string]*string)
   685  	for k, v := range meta {
   686  		k = strings.ToLower(xattrEscape(k))
   687  		metadata[k] = PString(xattrEscape(string(v)))
   688  	}
   689  	return
   690  }
   691  
   692  func unescapeMetadata(meta map[string]*string) map[string][]byte {
   693  	unescaped := make(map[string][]byte)
   694  	for k, v := range meta {
   695  		uk, err := url.PathUnescape(strings.ToLower(k))
   696  		if err == nil {
   697  			uv, err := url.PathUnescape(*v)
   698  			if err == nil {
   699  				unescaped[uk] = []byte(uv)
   700  			}
   701  		}
   702  	}
   703  	return unescaped
   704  }
   705  
   706  func (inode *Inode) SetXattr(name string, value []byte, flags uint32) error {
   707  	inode.logFuse("SetXattr", name)
   708  
   709  	if name == "debug" {
   710  		inode.DumpTree(string(value) == "buffers")
   711  		return nil
   712  	}
   713  
   714  	inode.mu.Lock()
   715  	defer inode.mu.Unlock()
   716  
   717  	if inode.CacheState == ST_DELETED || inode.CacheState == ST_DEAD {
   718  		// Oops, it's a deleted file. We don't support changing invisible files
   719  		return syscall.ENOENT
   720  	}
   721  
   722  	meta, name, err := inode.getXattrMap(name, true)
   723  	if err == syscall.EPERM {
   724  		// Silently ignore forbidden xattr operations
   725  		return nil
   726  	}
   727  	if err != nil {
   728  		return err
   729  	}
   730  
   731  	if flags != 0x0 {
   732  		_, ok := meta[name]
   733  		if flags == XATTR_CREATE {
   734  			if ok {
   735  				return syscall.EEXIST
   736  			}
   737  		} else if flags == XATTR_REPLACE {
   738  			if !ok {
   739  				return ENOATTR
   740  			}
   741  		}
   742  	}
   743  
   744  	meta[name] = Dup(value)
   745  	inode.userMetadataDirty = 2
   746  	if inode.CacheState == ST_CACHED {
   747  		inode.SetCacheState(ST_MODIFIED)
   748  		inode.fs.WakeupFlusher()
   749  	}
   750  	return nil
   751  }
   752  
   753  func (inode *Inode) RemoveXattr(name string) error {
   754  	inode.logFuse("RemoveXattr", name)
   755  
   756  	inode.mu.Lock()
   757  	defer inode.mu.Unlock()
   758  
   759  	if inode.CacheState == ST_DELETED || inode.CacheState == ST_DEAD {
   760  		// Oops, it's a deleted file. We don't support changing invisible files
   761  		return syscall.ENOENT
   762  	}
   763  
   764  	meta, name, err := inode.getXattrMap(name, true)
   765  	if err == syscall.EPERM {
   766  		// Silently ignore forbidden xattr operations
   767  		return nil
   768  	}
   769  	if err != nil {
   770  		return err
   771  	}
   772  
   773  	if _, ok := meta[name]; ok {
   774  		delete(meta, name)
   775  		inode.userMetadataDirty = 2
   776  		if inode.CacheState == ST_CACHED {
   777  			inode.SetCacheState(ST_MODIFIED)
   778  			inode.fs.WakeupFlusher()
   779  		}
   780  		return err
   781  	} else {
   782  		return ENOATTR
   783  	}
   784  }
   785  
   786  func (inode *Inode) GetXattr(name string) ([]byte, error) {
   787  	inode.logFuse("GetXattr", name)
   788  	if name == "geesefs" {
   789  		return []byte(cfg.GEESEFS_VERSION), nil
   790  	}
   791  
   792  	inode.mu.Lock()
   793  	defer inode.mu.Unlock()
   794  
   795  	meta, name, err := inode.getXattrMap(name, false)
   796  	if err != nil {
   797  		return nil, err
   798  	}
   799  
   800  	value, ok := meta[name]
   801  	if ok {
   802  		return value, nil
   803  	} else {
   804  		return nil, ENOATTR
   805  	}
   806  }
   807  
   808  func (inode *Inode) ListXattr() ([]string, error) {
   809  	inode.mu.Lock()
   810  	defer inode.mu.Unlock()
   811  
   812  	var xattrs []string
   813  
   814  	err := inode.fillXattr()
   815  	if err != nil {
   816  		return nil, err
   817  	}
   818  
   819  	cloud, _ := inode.cloud()
   820  	cloudXattrPrefix := cloud.Capabilities().Name + "."
   821  
   822  	for k, _ := range inode.s3Metadata {
   823  		xattrs = append(xattrs, cloudXattrPrefix+k)
   824  	}
   825  
   826  	for k, _ := range inode.userMetadata {
   827  		xattrs = append(xattrs, "user."+k)
   828  	}
   829  
   830  	sort.Strings(xattrs)
   831  
   832  	return xattrs, nil
   833  }
   834  
   835  func (inode *Inode) OpenFile() (fh *FileHandle, err error) {
   836  	inode.mu.Lock()
   837  	defer inode.mu.Unlock()
   838  
   839  	fh = NewFileHandle(inode)
   840  
   841  	n := atomic.AddInt32(&inode.fileHandles, 1)
   842  	if n == 1 {
   843  		// This is done to try to protect directories with open files
   844  		inode.Parent.addModified(1)
   845  	}
   846  	return
   847  }
   848  
   849  func (inode *Inode) DumpTree(withBuffers bool) {
   850  	children := inode.DumpThis(withBuffers)
   851  	for _, child := range children {
   852  		child.DumpThis(withBuffers)
   853  	}
   854  }
   855  
   856  func (inode *Inode) DumpThis(withBuffers bool) (children []*Inode) {
   857  	inode.mu.Lock()
   858  	defer inode.mu.Unlock()
   859  
   860  	fs := inode.fs
   861  
   862  	dataMap := make(map[string]interface{})
   863  	dataMap["id"] = inode.Id
   864  	dataMap["path"] = inode.FullName()
   865  	if inode.CacheState == ST_DEAD {
   866  		dataMap["state"] = "dead"
   867  	} else if inode.CacheState == ST_CREATED {
   868  		dataMap["state"] = "created"
   869  	} else if inode.CacheState == ST_MODIFIED {
   870  		dataMap["state"] = "modified"
   871  	} else if inode.CacheState == ST_DELETED {
   872  		dataMap["state"] = "deleted"
   873  	}
   874  
   875  	dataMap["size"] = inode.Attributes.Size
   876  	dataMap["mtime"] = inode.Attributes.Mtime.Unix()
   877  	dataMap["ctime"] = inode.Attributes.Ctime.Unix()
   878  	if inode.Attributes.Uid != fs.flags.Uid {
   879  		dataMap["uid"] = inode.Attributes.Uid
   880  	}
   881  	if inode.Attributes.Gid != fs.flags.Gid {
   882  		dataMap["gid"] = inode.Attributes.Gid
   883  	}
   884  	if inode.Attributes.Rdev != 0 {
   885  		dataMap["rdev"] = inode.Attributes.Rdev
   886  	}
   887  	if inode.isDir() && inode.Attributes.Mode != (os.ModeDir | fs.flags.DirMode) ||
   888  		!inode.isDir() && inode.Attributes.Mode != fs.flags.FileMode {
   889  		dataMap["mode"] = fuseops.ConvertGolangMode(inode.Attributes.Mode)
   890  	}
   891  
   892  	dataMap["attrTime"] = inode.AttrTime.Unix()
   893  	dataMap["expireTime"] = inode.ExpireTime.Unix()
   894  	if inode.fileHandles != 0 {
   895  		dataMap["fileHandles"] = inode.fileHandles
   896  	}
   897  	if inode.oldParent != nil {
   898  		oldPath := inode.oldName
   899  		if inode.oldParent.Id != fuseops.RootInodeID {
   900  			oldPath = inode.oldParent.FullName()+"/"+oldPath
   901  		}
   902  		dataMap["oldPath"] = oldPath
   903  		if inode.renamingTo {
   904  			dataMap["renameStarted"] = true
   905  		}
   906  	}
   907  	if len(inode.userMetadata) != 0 {
   908  		dataMap["userMetadata"] = inode.userMetadata
   909  	}
   910  	if inode.userMetadataDirty != 0 {
   911  		dataMap["userMetadataDirty"] = inode.userMetadataDirty
   912  	}
   913  	dataMap["knownSize"] = inode.knownSize
   914  	dataMap["knownETag"] = inode.knownETag
   915  	dataMap["refcnt"] = inode.refcnt
   916  	if inode.pauseWriters != 0 {
   917  		dataMap["pauseWriters"] = inode.pauseWriters
   918  	}
   919  	if inode.forceFlush {
   920  		dataMap["forceFlush"] = true
   921  	}
   922  	if inode.IsFlushing != 0 {
   923  		dataMap["flushing"] = inode.IsFlushing
   924  	}
   925  	if inode.OnDisk {
   926  		dataMap["onDisk"] = inode.OnDisk
   927  	}
   928  	if inode.flushError != nil {
   929  		dataMap["flushError"] = inode.flushError.Error()
   930  		dataMap["flushErrorTime"] = inode.flushErrorTime.Unix()
   931  	}
   932  	if inode.readError != nil {
   933  		dataMap["readError"] = inode.readError.Error()
   934  	}
   935  
   936  	if withBuffers && len(inode.readRanges) > 0 {
   937  		var ranges [][]uint64
   938  		for _, r := range inode.readRanges {
   939  			fl := uint64(0)
   940  			if r.Flushing {
   941  				fl = 1
   942  			}
   943  			ranges = append(ranges, []uint64{fl, r.Offset, r.Size})
   944  		}
   945  		dataMap["readRanges"] = ranges
   946  	}
   947  	if withBuffers && inode.mpu != nil {
   948  		var mpu []string
   949  		for i := uint32(0); i < inode.mpu.NumParts; i++ {
   950  			mpu = append(mpu, NilStr(inode.mpu.Parts[i]))
   951  		}
   952  		dataMap["uploadId"] = *inode.mpu.UploadId
   953  		dataMap["uploadParts"] = mpu
   954  	}
   955  
   956  	if inode.isDir() {
   957  		dirData := make(map[string]interface{})
   958  		dirData["dirTime"] = inode.dir.DirTime.Unix()
   959  		if inode.dir.ModifiedChildren != 0 {
   960  			dirData["modifiedChildren"] = inode.dir.ModifiedChildren
   961  		}
   962  		if inode.dir.ImplicitDir {
   963  			dirData["implicit"] = true
   964  		}
   965  		if inode.dir.listMarker != "" {
   966  			dirData["listMarker"] = inode.dir.listMarker
   967  		}
   968  		if inode.dir.lastFromCloud != nil {
   969  			dirData["lastFromCloud"] = *inode.dir.lastFromCloud
   970  		}
   971  		if !inode.dir.listDone {
   972  			dirData["listing"] = true
   973  		}
   974  		if inode.dir.forgetDuringList {
   975  			dirData["forgetDuringList"] = true
   976  		}
   977  		if inode.dir.lastOpenDirIdx != 0 {
   978  			dirData["lastOpenDirIdx"] = inode.dir.lastOpenDirIdx
   979  		}
   980  		if inode.dir.seqOpenDirScore >= 2 {
   981  			dirData["seqOpenDir"] = true
   982  		}
   983  		if !inode.dir.refreshStartTime.IsZero() {
   984  			dirData["refreshStartTime"] = inode.dir.refreshStartTime.Unix()
   985  		}
   986  		if len(inode.dir.handles) > 0 {
   987  			dirData["dirHandles"] = len(inode.dir.handles)
   988  		}
   989  		if len(inode.dir.Gaps) > 0 {
   990  			var gaps []map[string]interface{}
   991  			for _, gap := range inode.dir.Gaps {
   992  				m := make(map[string]interface{})
   993  				m["start"] = gap.start
   994  				m["end"] = gap.end
   995  				m["loadTime"] = gap.loadTime
   996  			}
   997  			dirData["gaps"] = gaps
   998  		}
   999  		if len(inode.dir.DeletedChildren) > 0 {
  1000  			var deletedNames []string
  1001  			for key := range inode.dir.DeletedChildren {
  1002  				deletedNames = append(deletedNames, key)
  1003  			}
  1004  			dirData["deletedChildren"] = deletedNames
  1005  		}
  1006  		dataMap["dir"] = dirData
  1007  		for _, child := range inode.dir.Children {
  1008  			children = append(children, child)
  1009  		}
  1010  	}
  1011  
  1012  	dumpBuf, _ := json.Marshal(dataMap)
  1013  	dump := string(dumpBuf)
  1014  	if withBuffers && inode.buffers.Count() > 0 {
  1015  		b := inode.buffers.Dump(0, 0xffffffffffffffff)
  1016  		b = b[0:len(b)-1]
  1017  		dump += "\n"+b
  1018  	}
  1019  	log.Error(dump)
  1020  
  1021  	return children
  1022  }