github.com/djmaze/goofys@v0.24.2/internal/goofys.go (about)

     1  // Copyright 2015 - 2017 Ka-Hing Cheung
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package internal
    16  
    17  import (
    18  	. "github.com/djmaze/goofys/api/common"
    19  
    20  	"context"
    21  	"fmt"
    22  	"math/rand"
    23  	"net/url"
    24  	"runtime/debug"
    25  	"strings"
    26  	"sync"
    27  	"sync/atomic"
    28  	"syscall"
    29  	"time"
    30  
    31  	"github.com/aws/aws-sdk-go/aws/awserr"
    32  
    33  	"github.com/jacobsa/fuse"
    34  	"github.com/jacobsa/fuse/fuseops"
    35  	"github.com/jacobsa/fuse/fuseutil"
    36  
    37  	"net/http"
    38  
    39  	"github.com/sirupsen/logrus"
    40  )
    41  
    42  // goofys is a Filey System written in Go. All the backend data is
    43  // stored on S3 as is. It's a Filey System instead of a File System
    44  // because it makes minimal effort at being POSIX
    45  // compliant. Particularly things that are difficult to support on S3
    46  // or would translate into more than one round-trip would either fail
    47  // (rename non-empty dir) or faked (no per-file permission). goofys
    48  // does not have a on disk data cache, and consistency model is
    49  // close-to-open.
    50  
    51  type Goofys struct {
    52  	fuseutil.NotImplementedFileSystem
    53  	bucket string
    54  
    55  	flags *FlagStorage
    56  
    57  	umask uint32
    58  
    59  	gcsS3     bool
    60  	rootAttrs InodeAttributes
    61  
    62  	bufferPool *BufferPool
    63  
    64  	// A lock protecting the state of the file system struct itself (distinct
    65  	// from per-inode locks). Make sure to see the notes on lock ordering above.
    66  	mu sync.RWMutex
    67  
    68  	// The next inode ID to hand out. We assume that this will never overflow,
    69  	// since even if we were handing out inode IDs at 4 GHz, it would still take
    70  	// over a century to do so.
    71  	//
    72  	// GUARDED_BY(mu)
    73  	nextInodeID fuseops.InodeID
    74  
    75  	// The collection of live inodes, keyed by inode ID. No ID less than
    76  	// fuseops.RootInodeID is ever used.
    77  	//
    78  	// INVARIANT: For all keys k, fuseops.RootInodeID <= k < nextInodeID
    79  	// INVARIANT: For all keys k, inodes[k].ID() == k
    80  	// INVARIANT: inodes[fuseops.RootInodeID] is missing or of type inode.DirInode
    81  	// INVARIANT: For all v, if IsDirName(v.Name()) then v is inode.DirInode
    82  	//
    83  	// GUARDED_BY(mu)
    84  	inodes map[fuseops.InodeID]*Inode
    85  
    86  	nextHandleID fuseops.HandleID
    87  	dirHandles   map[fuseops.HandleID]*DirHandle
    88  
    89  	fileHandles map[fuseops.HandleID]*FileHandle
    90  
    91  	replicators *Ticket
    92  	restorers   *Ticket
    93  
    94  	forgotCnt uint32
    95  }
    96  
    97  var s3Log = GetLogger("s3")
    98  var log = GetLogger("main")
    99  var fuseLog = GetLogger("fuse")
   100  
   101  func NewBackend(bucket string, flags *FlagStorage) (cloud StorageBackend, err error) {
   102  	if flags.Backend == nil {
   103  		flags.Backend = (&S3Config{}).Init()
   104  	}
   105  
   106  	if config, ok := flags.Backend.(*AZBlobConfig); ok {
   107  		cloud, err = NewAZBlob(bucket, config)
   108  	} else if config, ok := flags.Backend.(*ADLv1Config); ok {
   109  		cloud, err = NewADLv1(bucket, flags, config)
   110  	} else if config, ok := flags.Backend.(*ADLv2Config); ok {
   111  		cloud, err = NewADLv2(bucket, flags, config)
   112  	} else if config, ok := flags.Backend.(*S3Config); ok {
   113  		if strings.HasSuffix(flags.Endpoint, "/storage.googleapis.com") {
   114  			cloud, err = NewGCS3(bucket, flags, config)
   115  		} else {
   116  			cloud, err = NewS3(bucket, flags, config)
   117  		}
   118  	} else if config, ok := flags.Backend.(*GCSConfig); ok {
   119  		cloud, err = NewGCS(bucket, config)
   120  	} else {
   121  		err = fmt.Errorf("Unknown backend config: %T", flags.Backend)
   122  	}
   123  
   124  	return
   125  }
   126  
   127  type BucketSpec struct {
   128  	Scheme string
   129  	Bucket string
   130  	Prefix string
   131  }
   132  
   133  func ParseBucketSpec(bucket string) (spec BucketSpec, err error) {
   134  	if strings.Index(bucket, "://") != -1 {
   135  		var u *url.URL
   136  		u, err = url.Parse(bucket)
   137  		if err != nil {
   138  			return
   139  		}
   140  
   141  		spec.Scheme = u.Scheme
   142  		spec.Bucket = u.Host
   143  		if u.User != nil {
   144  			// wasb url can be wasb://container@storage-end-point
   145  			// we want to return the entire thing as bucket
   146  			spec.Bucket = u.User.String() + "@" + u.Host
   147  		}
   148  		spec.Prefix = u.Path
   149  	} else {
   150  		spec.Scheme = "s3"
   151  
   152  		colon := strings.Index(bucket, ":")
   153  		if colon != -1 {
   154  			spec.Prefix = bucket[colon+1:]
   155  			spec.Bucket = bucket[0:colon]
   156  		} else {
   157  			spec.Bucket = bucket
   158  		}
   159  	}
   160  
   161  	spec.Prefix = strings.Trim(spec.Prefix, "/")
   162  	if spec.Prefix != "" {
   163  		spec.Prefix += "/"
   164  	}
   165  	return
   166  }
   167  
   168  func NewGoofys(ctx context.Context, bucket string, flags *FlagStorage) *Goofys {
   169  	return newGoofys(ctx, bucket, flags, NewBackend)
   170  }
   171  
   172  func newGoofys(ctx context.Context, bucket string, flags *FlagStorage,
   173  	newBackend func(string, *FlagStorage) (StorageBackend, error)) *Goofys {
   174  	// Set up the basic struct.
   175  	fs := &Goofys{
   176  		bucket: bucket,
   177  		flags:  flags,
   178  		umask:  0122,
   179  	}
   180  
   181  	var prefix string
   182  	colon := strings.Index(bucket, ":")
   183  	if colon != -1 {
   184  		prefix = bucket[colon+1:]
   185  		prefix = strings.Trim(prefix, "/")
   186  		if prefix != "" {
   187  			prefix += "/"
   188  		}
   189  
   190  		fs.bucket = bucket[0:colon]
   191  		bucket = fs.bucket
   192  	}
   193  
   194  	if flags.DebugS3 {
   195  		s3Log.Level = logrus.DebugLevel
   196  	}
   197  
   198  	cloud, err := newBackend(bucket, flags)
   199  	if err != nil {
   200  		log.Errorf("Unable to setup backend: %v", err)
   201  		return nil
   202  	}
   203  	_, fs.gcsS3 = cloud.Delegate().(*GCS3)
   204  
   205  	randomObjectName := prefix + (RandStringBytesMaskImprSrc(32))
   206  	err = cloud.Init(randomObjectName)
   207  	if err != nil {
   208  		log.Errorf("Unable to access '%v': %v", bucket, err)
   209  		return nil
   210  	}
   211  	go cloud.MultipartExpire(&MultipartExpireInput{})
   212  
   213  	now := time.Now()
   214  	fs.rootAttrs = InodeAttributes{
   215  		Size:  4096,
   216  		Mtime: now,
   217  	}
   218  
   219  	fs.bufferPool = BufferPool{}.Init()
   220  
   221  	fs.nextInodeID = fuseops.RootInodeID + 1
   222  	fs.inodes = make(map[fuseops.InodeID]*Inode)
   223  	root := NewInode(fs, nil, PString(""))
   224  	root.Id = fuseops.RootInodeID
   225  	root.ToDir()
   226  	root.dir.cloud = cloud
   227  	root.dir.mountPrefix = prefix
   228  	root.Attributes.Mtime = fs.rootAttrs.Mtime
   229  
   230  	fs.inodes[fuseops.RootInodeID] = root
   231  	fs.addDotAndDotDot(root)
   232  
   233  	fs.nextHandleID = 1
   234  	fs.dirHandles = make(map[fuseops.HandleID]*DirHandle)
   235  
   236  	fs.fileHandles = make(map[fuseops.HandleID]*FileHandle)
   237  
   238  	fs.replicators = Ticket{Total: 16}.Init()
   239  	fs.restorers = Ticket{Total: 20}.Init()
   240  
   241  	return fs
   242  }
   243  
   244  // from https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
   245  func RandStringBytesMaskImprSrc(n int) string {
   246  	const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
   247  	const (
   248  		letterIdxBits = 6                    // 6 bits to represent a letter index
   249  		letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
   250  		letterIdxMax  = 63 / letterIdxBits   // # of letter indices fitting in 63 bits
   251  	)
   252  	src := rand.NewSource(time.Now().UnixNano())
   253  	b := make([]byte, n)
   254  	// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
   255  	for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
   256  		if remain == 0 {
   257  			cache, remain = src.Int63(), letterIdxMax
   258  		}
   259  		if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
   260  			b[i] = letterBytes[idx]
   261  			i--
   262  		}
   263  		cache >>= letterIdxBits
   264  		remain--
   265  	}
   266  
   267  	return string(b)
   268  }
   269  
   270  func (fs *Goofys) SigUsr1() {
   271  	fs.mu.RLock()
   272  
   273  	log.Infof("forgot %v inodes", fs.forgotCnt)
   274  	log.Infof("%v inodes", len(fs.inodes))
   275  	fs.mu.RUnlock()
   276  	debug.FreeOSMemory()
   277  }
   278  
   279  // Find the given inode. Panic if it doesn't exist.
   280  //
   281  // RLOCKS_REQUIRED(fs.mu)
   282  func (fs *Goofys) getInodeOrDie(id fuseops.InodeID) (inode *Inode) {
   283  	inode = fs.inodes[id]
   284  	if inode == nil {
   285  		panic(fmt.Sprintf("Unknown inode: %v", id))
   286  	}
   287  
   288  	return
   289  }
   290  
   291  type Mount struct {
   292  	// Mount Point relative to goofys's root mount.
   293  	name    string
   294  	cloud   StorageBackend
   295  	prefix  string
   296  	mounted bool
   297  }
   298  
   299  func (fs *Goofys) mount(mp *Inode, b *Mount) {
   300  	if b.mounted {
   301  		return
   302  	}
   303  
   304  	name := strings.Trim(b.name, "/")
   305  
   306  	// create path for the mount. AttrTime is set to TIME_MAX so
   307  	// they will never expire and be removed. But DirTime is not
   308  	// so we will still consult the underlining cloud for listing
   309  	// (which will then be merged with the cached result)
   310  
   311  	for {
   312  		idx := strings.Index(name, "/")
   313  		if idx == -1 {
   314  			break
   315  		}
   316  		dirName := name[0:idx]
   317  		name = name[idx+1:]
   318  
   319  		mp.mu.Lock()
   320  		dirInode := mp.findChildUnlocked(dirName)
   321  		if dirInode == nil {
   322  			fs.mu.Lock()
   323  
   324  			dirInode = NewInode(fs, mp, &dirName)
   325  			dirInode.ToDir()
   326  			dirInode.AttrTime = TIME_MAX
   327  
   328  			fs.insertInode(mp, dirInode)
   329  			fs.mu.Unlock()
   330  		}
   331  		mp.mu.Unlock()
   332  		mp = dirInode
   333  	}
   334  
   335  	mp.mu.Lock()
   336  	defer mp.mu.Unlock()
   337  
   338  	prev := mp.findChildUnlocked(name)
   339  	if prev == nil {
   340  		mountInode := NewInode(fs, mp, &name)
   341  		mountInode.ToDir()
   342  		mountInode.dir.cloud = b.cloud
   343  		mountInode.dir.mountPrefix = b.prefix
   344  		mountInode.AttrTime = TIME_MAX
   345  
   346  		fs.mu.Lock()
   347  		defer fs.mu.Unlock()
   348  
   349  		fs.insertInode(mp, mountInode)
   350  		prev = mountInode
   351  	} else {
   352  		if !prev.isDir() {
   353  			panic(fmt.Sprintf("inode %v is not a directory", *prev.FullName()))
   354  		}
   355  
   356  		// This inode might have some cached data from a parent mount.
   357  		// Clear this cache by resetting the DirTime.
   358  		// Note: resetDirTimeRec should be called without holding the lock.
   359  		prev.resetDirTimeRec()
   360  		prev.mu.Lock()
   361  		defer prev.mu.Unlock()
   362  		prev.dir.cloud = b.cloud
   363  		prev.dir.mountPrefix = b.prefix
   364  		prev.AttrTime = TIME_MAX
   365  
   366  	}
   367  	fuseLog.Infof("mounted /%v", *prev.FullName())
   368  	b.mounted = true
   369  }
   370  
   371  func (fs *Goofys) MountAll(mounts []*Mount) {
   372  	fs.mu.RLock()
   373  	root := fs.getInodeOrDie(fuseops.RootInodeID)
   374  	fs.mu.RUnlock()
   375  
   376  	for _, m := range mounts {
   377  		fs.mount(root, m)
   378  	}
   379  }
   380  
   381  func (fs *Goofys) Mount(mount *Mount) {
   382  	fs.mu.RLock()
   383  	root := fs.getInodeOrDie(fuseops.RootInodeID)
   384  	fs.mu.RUnlock()
   385  	fs.mount(root, mount)
   386  }
   387  
   388  func (fs *Goofys) Unmount(mountPoint string) {
   389  	fs.mu.RLock()
   390  	mp := fs.getInodeOrDie(fuseops.RootInodeID)
   391  	fs.mu.RUnlock()
   392  
   393  	fuseLog.Infof("Attempting to unmount %v", mountPoint)
   394  	path := strings.Split(strings.Trim(mountPoint, "/"), "/")
   395  	for _, localName := range path {
   396  		dirInode := mp.findChild(localName)
   397  		if dirInode == nil || !dirInode.isDir() {
   398  			fuseLog.Errorf("Failed to find directory:%v while unmounting %v. "+
   399  				"Ignoring the unmount operation.", localName, mountPoint)
   400  			return
   401  		}
   402  		mp = dirInode
   403  	}
   404  	mp.ResetForUnmount()
   405  	return
   406  }
   407  
   408  func (fs *Goofys) StatFS(
   409  	ctx context.Context,
   410  	op *fuseops.StatFSOp) (err error) {
   411  
   412  	const BLOCK_SIZE = 4096
   413  	const TOTAL_SPACE = 1 * 1024 * 1024 * 1024 * 1024 * 1024 // 1PB
   414  	const TOTAL_BLOCKS = TOTAL_SPACE / BLOCK_SIZE
   415  	const INODES = 1 * 1000 * 1000 * 1000 // 1 billion
   416  	op.BlockSize = BLOCK_SIZE
   417  	op.Blocks = TOTAL_BLOCKS
   418  	op.BlocksFree = TOTAL_BLOCKS
   419  	op.BlocksAvailable = TOTAL_BLOCKS
   420  	op.IoSize = 1 * 1024 * 1024 // 1MB
   421  	op.Inodes = INODES
   422  	op.InodesFree = INODES
   423  	return
   424  }
   425  
   426  func (fs *Goofys) GetInodeAttributes(
   427  	ctx context.Context,
   428  	op *fuseops.GetInodeAttributesOp) (err error) {
   429  
   430  	fs.mu.RLock()
   431  	inode := fs.getInodeOrDie(op.Inode)
   432  	fs.mu.RUnlock()
   433  
   434  	attr, err := inode.GetAttributes()
   435  	if err == nil {
   436  		op.Attributes = *attr
   437  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   438  	}
   439  
   440  	return
   441  }
   442  
   443  func (fs *Goofys) GetXattr(ctx context.Context,
   444  	op *fuseops.GetXattrOp) (err error) {
   445  	fs.mu.RLock()
   446  	inode := fs.getInodeOrDie(op.Inode)
   447  	fs.mu.RUnlock()
   448  
   449  	value, err := inode.GetXattr(op.Name)
   450  	if err != nil {
   451  		return
   452  	}
   453  
   454  	op.BytesRead = len(value)
   455  
   456  	if len(op.Dst) != 0 {
   457  		if len(op.Dst) < op.BytesRead {
   458  			return syscall.ERANGE
   459  		}
   460  
   461  		copy(op.Dst, value)
   462  	}
   463  	return
   464  }
   465  
   466  func (fs *Goofys) ListXattr(ctx context.Context,
   467  	op *fuseops.ListXattrOp) (err error) {
   468  	var xattrs []string
   469  
   470  	fs.mu.RLock()
   471  	inode := fs.getInodeOrDie(op.Inode)
   472  	fs.mu.RUnlock()
   473  
   474  	// Do not try to get xattrs for the root inode
   475  	// Otherwise the path in the S3 request is empty, which is invalid
   476  	if inode.Id != 1 {
   477  		xattrs, err = inode.ListXattr()
   478  	}
   479  
   480  	ncopied := 0
   481  
   482  	for _, name := range xattrs {
   483  		buf := op.Dst[ncopied:]
   484  		nlen := len(name) + 1
   485  
   486  		if nlen <= len(buf) {
   487  			copy(buf, name)
   488  			ncopied += nlen
   489  			buf[nlen-1] = '\x00'
   490  		}
   491  
   492  		op.BytesRead += nlen
   493  	}
   494  
   495  	if len(op.Dst) != 0 && ncopied < op.BytesRead {
   496  		err = syscall.ERANGE
   497  	}
   498  
   499  	return
   500  }
   501  
   502  func (fs *Goofys) RemoveXattr(ctx context.Context,
   503  	op *fuseops.RemoveXattrOp) (err error) {
   504  	fs.mu.RLock()
   505  	inode := fs.getInodeOrDie(op.Inode)
   506  	fs.mu.RUnlock()
   507  
   508  	err = inode.RemoveXattr(op.Name)
   509  
   510  	return
   511  }
   512  
   513  func (fs *Goofys) SetXattr(ctx context.Context,
   514  	op *fuseops.SetXattrOp) (err error) {
   515  	fs.mu.RLock()
   516  	inode := fs.getInodeOrDie(op.Inode)
   517  	fs.mu.RUnlock()
   518  
   519  	err = inode.SetXattr(op.Name, op.Value, op.Flags)
   520  	return
   521  }
   522  
   523  func mapHttpError(status int) error {
   524  	switch status {
   525  	case 400:
   526  		return fuse.EINVAL
   527  	case 401:
   528  		return syscall.EACCES
   529  	case 403:
   530  		return syscall.EACCES
   531  	case 404:
   532  		return fuse.ENOENT
   533  	case 405:
   534  		return syscall.ENOTSUP
   535  	case http.StatusConflict:
   536  		return syscall.EINTR
   537  	case 429:
   538  		return syscall.EAGAIN
   539  	case 500:
   540  		return syscall.EAGAIN
   541  	default:
   542  		return nil
   543  	}
   544  }
   545  
   546  func mapAwsError(err error) error {
   547  	if err == nil {
   548  		return nil
   549  	}
   550  
   551  	if awsErr, ok := err.(awserr.Error); ok {
   552  		switch awsErr.Code() {
   553  		case "BucketRegionError":
   554  			// don't need to log anything, we should detect region after
   555  			return err
   556  		case "NoSuchBucket":
   557  			return syscall.ENXIO
   558  		case "BucketAlreadyOwnedByYou":
   559  			return fuse.EEXIST
   560  		}
   561  
   562  		if reqErr, ok := err.(awserr.RequestFailure); ok {
   563  			// A service error occurred
   564  			err = mapHttpError(reqErr.StatusCode())
   565  			if err != nil {
   566  				return err
   567  			} else {
   568  				s3Log.Errorf("http=%v %v s3=%v request=%v\n",
   569  					reqErr.StatusCode(), reqErr.Message(),
   570  					awsErr.Code(), reqErr.RequestID())
   571  				return reqErr
   572  			}
   573  		} else {
   574  			// Generic AWS Error with Code, Message, and original error (if any)
   575  			s3Log.Errorf("code=%v msg=%v, err=%v\n", awsErr.Code(), awsErr.Message(), awsErr.OrigErr())
   576  			return awsErr
   577  		}
   578  	} else {
   579  		return err
   580  	}
   581  }
   582  
   583  // note that this is NOT the same as url.PathEscape in golang 1.8,
   584  // as this preserves / and url.PathEscape converts / to %2F
   585  func pathEscape(path string) string {
   586  	u := url.URL{Path: path}
   587  	return u.EscapedPath()
   588  }
   589  
   590  func (fs *Goofys) allocateInodeId() (id fuseops.InodeID) {
   591  	id = fs.nextInodeID
   592  	fs.nextInodeID++
   593  	return
   594  }
   595  
   596  func expired(cache time.Time, ttl time.Duration) bool {
   597  	now := time.Now()
   598  	if cache.After(now) {
   599  		return false
   600  	}
   601  	return !cache.Add(ttl).After(now)
   602  }
   603  
   604  func (fs *Goofys) LookUpInode(
   605  	ctx context.Context,
   606  	op *fuseops.LookUpInodeOp) (err error) {
   607  
   608  	var inode *Inode
   609  	var ok bool
   610  	defer func() { fuseLog.Debugf("<-- LookUpInode %v %v %v", op.Parent, op.Name, err) }()
   611  
   612  	fs.mu.RLock()
   613  	parent := fs.getInodeOrDie(op.Parent)
   614  	fs.mu.RUnlock()
   615  
   616  	parent.mu.Lock()
   617  	inode = parent.findChildUnlocked(op.Name)
   618  	if inode != nil {
   619  		ok = true
   620  		inode.Ref()
   621  
   622  		if expired(inode.AttrTime, fs.flags.StatCacheTTL) {
   623  			ok = false
   624  			if atomic.LoadInt32(&inode.fileHandles) != 0 {
   625  				// we have an open file handle, object
   626  				// in S3 may not represent the true
   627  				// state of the file anyway, so just
   628  				// return what we know which is
   629  				// potentially more accurate
   630  				ok = true
   631  			} else {
   632  				inode.logFuse("lookup expired")
   633  			}
   634  		}
   635  	} else {
   636  		ok = false
   637  	}
   638  	parent.mu.Unlock()
   639  
   640  	if !ok {
   641  		var newInode *Inode
   642  
   643  		newInode, err = parent.LookUp(op.Name)
   644  		if err == fuse.ENOENT && inode != nil && inode.isDir() {
   645  			// we may not be able to look up an implicit
   646  			// dir if all the children are removed, so we
   647  			// just pretend this dir is still around
   648  			err = nil
   649  		} else if err != nil {
   650  			if inode != nil {
   651  				// just kidding! pretend we didn't up the ref
   652  				fs.mu.Lock()
   653  				defer fs.mu.Unlock()
   654  
   655  				stale := inode.DeRef(1)
   656  				if stale {
   657  					delete(fs.inodes, inode.Id)
   658  					parent.removeChild(inode)
   659  				}
   660  			}
   661  			return err
   662  		}
   663  
   664  		if inode == nil {
   665  			parent.mu.Lock()
   666  			// check again if it's there, could have been
   667  			// added by another lookup or readdir
   668  			inode = parent.findChildUnlocked(op.Name)
   669  			if inode == nil {
   670  				fs.mu.Lock()
   671  				inode = newInode
   672  				fs.insertInode(parent, inode)
   673  				fs.mu.Unlock()
   674  			}
   675  			parent.mu.Unlock()
   676  		} else {
   677  			inode.mu.Lock()
   678  
   679  			if newInode != nil {
   680  				// if only size changed, kernel seems to
   681  				// automatically drop cache
   682  				if !inode.Attributes.Equal(newInode.Attributes) {
   683  					inode.logFuse("invalidate cache because attributes changed", inode.Attributes, newInode.Attributes)
   684  					inode.invalidateCache = true
   685  				} else if inode.knownETag != nil &&
   686  					newInode.knownETag != nil &&
   687  					*inode.knownETag != *newInode.knownETag {
   688  					// if this is a new file (ie:
   689  					// inode.knownETag is nil),
   690  					// then prefer to read our own
   691  					// write then reading updated
   692  					// data
   693  					inode.logFuse("invalidate cache because etag changed", *inode.knownETag, *newInode.knownETag)
   694  					inode.invalidateCache = true
   695  				}
   696  
   697  				if newInode.Attributes.Mtime.IsZero() {
   698  					// this can happen if it's an
   699  					// implicit dir, use the last
   700  					// known value
   701  					newInode.Attributes.Mtime = inode.Attributes.Mtime
   702  				}
   703  				inode.Attributes = newInode.Attributes
   704  				inode.knownETag = newInode.knownETag
   705  			}
   706  			inode.AttrTime = time.Now()
   707  
   708  			inode.mu.Unlock()
   709  		}
   710  	}
   711  
   712  	op.Entry.Child = inode.Id
   713  	op.Entry.Attributes = inode.InflateAttributes()
   714  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   715  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
   716  
   717  	return
   718  }
   719  
   720  // LOCKS_REQUIRED(fs.mu)
   721  // LOCKS_REQUIRED(parent.mu)
   722  func (fs *Goofys) insertInode(parent *Inode, inode *Inode) {
   723  	addInode := false
   724  	if *inode.Name == "." {
   725  		inode.Id = parent.Id
   726  	} else if *inode.Name == ".." {
   727  		inode.Id = fuseops.InodeID(fuseops.RootInodeID)
   728  		if parent.Parent != nil {
   729  			inode.Id = parent.Parent.Id
   730  		}
   731  	} else {
   732  		if inode.Id != 0 {
   733  			panic(fmt.Sprintf("inode id is set: %v %v", *inode.Name, inode.Id))
   734  		}
   735  		inode.Id = fs.allocateInodeId()
   736  		addInode = true
   737  	}
   738  	parent.insertChildUnlocked(inode)
   739  	if addInode {
   740  		fs.inodes[inode.Id] = inode
   741  
   742  		// if we are inserting a new directory, also create
   743  		// the child . and ..
   744  		if inode.isDir() {
   745  			fs.addDotAndDotDot(inode)
   746  		}
   747  	}
   748  }
   749  
   750  func (fs *Goofys) addDotAndDotDot(dir *Inode) {
   751  	dot := NewInode(fs, dir, PString("."))
   752  	dot.ToDir()
   753  	dot.AttrTime = TIME_MAX
   754  	fs.insertInode(dir, dot)
   755  
   756  	dot = NewInode(fs, dir, PString(".."))
   757  	dot.ToDir()
   758  	dot.AttrTime = TIME_MAX
   759  	fs.insertInode(dir, dot)
   760  }
   761  
   762  func (fs *Goofys) ForgetInode(
   763  	ctx context.Context,
   764  	op *fuseops.ForgetInodeOp) (err error) {
   765  
   766  	fs.mu.RLock()
   767  	inode := fs.getInodeOrDie(op.Inode)
   768  	fs.mu.RUnlock()
   769  
   770  	if inode.Parent != nil {
   771  		inode.Parent.mu.Lock()
   772  		defer inode.Parent.mu.Unlock()
   773  	}
   774  	stale := inode.DeRef(op.N)
   775  
   776  	if stale {
   777  		fs.mu.Lock()
   778  		defer fs.mu.Unlock()
   779  
   780  		delete(fs.inodes, op.Inode)
   781  		fs.forgotCnt += 1
   782  
   783  		if inode.Parent != nil {
   784  			inode.Parent.removeChildUnlocked(inode)
   785  		}
   786  	}
   787  
   788  	return
   789  }
   790  
   791  func (fs *Goofys) OpenDir(
   792  	ctx context.Context,
   793  	op *fuseops.OpenDirOp) (err error) {
   794  	fs.mu.Lock()
   795  
   796  	handleID := fs.nextHandleID
   797  	fs.nextHandleID++
   798  
   799  	in := fs.getInodeOrDie(op.Inode)
   800  	fs.mu.Unlock()
   801  
   802  	// XXX/is this a dir?
   803  	dh := in.OpenDir()
   804  
   805  	fs.mu.Lock()
   806  	defer fs.mu.Unlock()
   807  
   808  	fs.dirHandles[handleID] = dh
   809  	op.Handle = handleID
   810  
   811  	return
   812  }
   813  
   814  func makeDirEntry(en *DirHandleEntry) fuseutil.Dirent {
   815  	return fuseutil.Dirent{
   816  		Name:   en.Name,
   817  		Type:   en.Type,
   818  		Inode:  en.Inode,
   819  		Offset: en.Offset,
   820  	}
   821  }
   822  
   823  func (fs *Goofys) ReadDir(
   824  	ctx context.Context,
   825  	op *fuseops.ReadDirOp) (err error) {
   826  
   827  	// Find the handle.
   828  	fs.mu.RLock()
   829  	dh := fs.dirHandles[op.Handle]
   830  	fs.mu.RUnlock()
   831  
   832  	if dh == nil {
   833  		panic(fmt.Sprintf("can't find dh=%v", op.Handle))
   834  	}
   835  
   836  	inode := dh.inode
   837  	inode.logFuse("ReadDir", op.Offset)
   838  
   839  	dh.mu.Lock()
   840  	defer dh.mu.Unlock()
   841  
   842  	for i := op.Offset; ; i++ {
   843  		e, err := dh.ReadDir(i)
   844  		if err != nil {
   845  			return err
   846  		}
   847  		if e == nil {
   848  			break
   849  		}
   850  
   851  		if e.Inode == 0 {
   852  			panic(fmt.Sprintf("unset inode %v", e.Name))
   853  		}
   854  
   855  		n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], makeDirEntry(e))
   856  		if n == 0 {
   857  			break
   858  		}
   859  
   860  		dh.inode.logFuse("<-- ReadDir", e.Name, e.Offset)
   861  
   862  		op.BytesRead += n
   863  	}
   864  
   865  	return
   866  }
   867  
   868  func (fs *Goofys) ReleaseDirHandle(
   869  	ctx context.Context,
   870  	op *fuseops.ReleaseDirHandleOp) (err error) {
   871  
   872  	fs.mu.Lock()
   873  	defer fs.mu.Unlock()
   874  
   875  	dh := fs.dirHandles[op.Handle]
   876  	dh.CloseDir()
   877  
   878  	fuseLog.Debugln("ReleaseDirHandle", *dh.inode.FullName())
   879  
   880  	delete(fs.dirHandles, op.Handle)
   881  
   882  	return
   883  }
   884  
   885  func (fs *Goofys) OpenFile(
   886  	ctx context.Context,
   887  	op *fuseops.OpenFileOp) (err error) {
   888  	fs.mu.RLock()
   889  	in := fs.getInodeOrDie(op.Inode)
   890  	fs.mu.RUnlock()
   891  
   892  	fh, err := in.OpenFile(op.Metadata)
   893  	if err != nil {
   894  		return
   895  	}
   896  
   897  	fs.mu.Lock()
   898  
   899  	handleID := fs.nextHandleID
   900  	fs.nextHandleID++
   901  
   902  	fs.fileHandles[handleID] = fh
   903  	fs.mu.Unlock()
   904  
   905  	op.Handle = handleID
   906  
   907  	in.mu.Lock()
   908  	defer in.mu.Unlock()
   909  
   910  	// this flag appears to tell the kernel if this open should
   911  	// use the page cache or not. "use" here means:
   912  	//
   913  	// read will read from cache
   914  	// write will populate cache
   915  	//
   916  	// because we have one flag to control both behaviors, if an
   917  	// object is updated out-of-band and we need to invalidate
   918  	// cache, and we write to this object locally, subsequent read
   919  	// will not read from cache
   920  	//
   921  	// see tests TestReadNewFileWithExternalChangesFuse and
   922  	// TestReadMyOwnWrite*Fuse
   923  	op.KeepPageCache = !in.invalidateCache
   924  	fh.keepPageCache = op.KeepPageCache
   925  	in.invalidateCache = false
   926  
   927  	return
   928  }
   929  
   930  func (fs *Goofys) ReadFile(
   931  	ctx context.Context,
   932  	op *fuseops.ReadFileOp) (err error) {
   933  
   934  	fs.mu.RLock()
   935  	fh := fs.fileHandles[op.Handle]
   936  	fs.mu.RUnlock()
   937  
   938  	op.BytesRead, err = fh.ReadFile(op.Offset, op.Dst)
   939  
   940  	return
   941  }
   942  
   943  func (fs *Goofys) SyncFile(
   944  	ctx context.Context,
   945  	op *fuseops.SyncFileOp) (err error) {
   946  
   947  	// intentionally ignored, so that write()/sync()/write() works
   948  	// see https://github.com/djmaze/goofys/issues/154
   949  	return
   950  }
   951  
   952  func (fs *Goofys) FlushFile(
   953  	ctx context.Context,
   954  	op *fuseops.FlushFileOp) (err error) {
   955  
   956  	fs.mu.RLock()
   957  	fh := fs.fileHandles[op.Handle]
   958  	fs.mu.RUnlock()
   959  
   960  	// If the file handle has a tgid, then flush the file only if the
   961  	// incoming request's tgid matches the tgid in the file handle.
   962  	// This check helps us with scenarios like https://github.com/djmaze/goofys/issues/273
   963  	// Also see goofys_test.go:TestClientForkExec.
   964  	if fh.Tgid != nil {
   965  		tgid, err := GetTgid(op.Metadata.Pid)
   966  		if err != nil {
   967  			fh.inode.logFuse("<-- FlushFile",
   968  				fmt.Sprintf("Failed to retrieve tgid from op.Metadata.Pid. FlushFileOp:%#v, err:%v",
   969  					op, err))
   970  			return fuse.EIO
   971  		}
   972  		if *fh.Tgid != *tgid {
   973  			fh.inode.logFuse("<-- FlushFile",
   974  				"Operation ignored",
   975  				fmt.Sprintf("fh.Pid:%v != tgid:%v, op:%#v", *fh.Tgid, *tgid, op))
   976  			return nil
   977  		}
   978  	}
   979  
   980  	err = fh.FlushFile()
   981  	if err != nil {
   982  		// if we returned success from creat() earlier
   983  		// linux may think this file exists even when it doesn't,
   984  		// until TypeCacheTTL is over
   985  		// TODO: figure out a way to make the kernel forget this inode
   986  		// see TestWriteAnonymousFuse
   987  		fs.mu.RLock()
   988  		inode := fs.getInodeOrDie(op.Inode)
   989  		fs.mu.RUnlock()
   990  
   991  		if inode.KnownSize == nil {
   992  			inode.AttrTime = time.Time{}
   993  		}
   994  
   995  	}
   996  	fh.inode.logFuse("<-- FlushFile", err, op.Handle, op.Inode)
   997  	return
   998  }
   999  
  1000  func (fs *Goofys) ReleaseFileHandle(
  1001  	ctx context.Context,
  1002  	op *fuseops.ReleaseFileHandleOp) (err error) {
  1003  	fs.mu.Lock()
  1004  	defer fs.mu.Unlock()
  1005  	fh := fs.fileHandles[op.Handle]
  1006  	fh.Release()
  1007  
  1008  	fuseLog.Debugln("ReleaseFileHandle", *fh.inode.FullName(), op.Handle, fh.inode.Id)
  1009  
  1010  	delete(fs.fileHandles, op.Handle)
  1011  
  1012  	// try to compact heap
  1013  	//fs.bufferPool.MaybeGC()
  1014  	return
  1015  }
  1016  
  1017  func (fs *Goofys) CreateFile(
  1018  	ctx context.Context,
  1019  	op *fuseops.CreateFileOp) (err error) {
  1020  
  1021  	fs.mu.RLock()
  1022  	parent := fs.getInodeOrDie(op.Parent)
  1023  	fs.mu.RUnlock()
  1024  
  1025  	inode, fh := parent.Create(op.Name, op.Metadata)
  1026  
  1027  	parent.mu.Lock()
  1028  
  1029  	fs.mu.Lock()
  1030  	defer fs.mu.Unlock()
  1031  	fs.insertInode(parent, inode)
  1032  
  1033  	parent.mu.Unlock()
  1034  
  1035  	op.Entry.Child = inode.Id
  1036  	op.Entry.Attributes = inode.InflateAttributes()
  1037  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1038  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
  1039  
  1040  	// Allocate a handle.
  1041  	handleID := fs.nextHandleID
  1042  	fs.nextHandleID++
  1043  
  1044  	fs.fileHandles[handleID] = fh
  1045  
  1046  	op.Handle = handleID
  1047  
  1048  	inode.logFuse("<-- CreateFile")
  1049  
  1050  	return
  1051  }
  1052  
  1053  func (fs *Goofys) MkDir(
  1054  	ctx context.Context,
  1055  	op *fuseops.MkDirOp) (err error) {
  1056  
  1057  	fs.mu.RLock()
  1058  	parent := fs.getInodeOrDie(op.Parent)
  1059  	fs.mu.RUnlock()
  1060  
  1061  	// ignore op.Mode for now
  1062  	inode, err := parent.MkDir(op.Name)
  1063  	if err != nil {
  1064  		return err
  1065  	}
  1066  
  1067  	parent.mu.Lock()
  1068  
  1069  	fs.mu.Lock()
  1070  	defer fs.mu.Unlock()
  1071  	fs.insertInode(parent, inode)
  1072  
  1073  	parent.mu.Unlock()
  1074  
  1075  	op.Entry.Child = inode.Id
  1076  	op.Entry.Attributes = inode.InflateAttributes()
  1077  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1078  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
  1079  
  1080  	return
  1081  }
  1082  
  1083  func (fs *Goofys) RmDir(
  1084  	ctx context.Context,
  1085  	op *fuseops.RmDirOp) (err error) {
  1086  
  1087  	fs.mu.RLock()
  1088  	parent := fs.getInodeOrDie(op.Parent)
  1089  	fs.mu.RUnlock()
  1090  
  1091  	err = parent.RmDir(op.Name)
  1092  	parent.logFuse("<-- RmDir", op.Name, err)
  1093  	return
  1094  }
  1095  
  1096  func (fs *Goofys) SetInodeAttributes(
  1097  	ctx context.Context,
  1098  	op *fuseops.SetInodeAttributesOp) (err error) {
  1099  
  1100  	fs.mu.RLock()
  1101  	inode := fs.getInodeOrDie(op.Inode)
  1102  	fs.mu.RUnlock()
  1103  
  1104  	attr, err := inode.GetAttributes()
  1105  	if err == nil {
  1106  		op.Attributes = *attr
  1107  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1108  	}
  1109  	return
  1110  }
  1111  
  1112  func (fs *Goofys) WriteFile(
  1113  	ctx context.Context,
  1114  	op *fuseops.WriteFileOp) (err error) {
  1115  
  1116  	fs.mu.RLock()
  1117  
  1118  	fh, ok := fs.fileHandles[op.Handle]
  1119  	if !ok {
  1120  		panic(fmt.Sprintf("WriteFile: can't find handle %v", op.Handle))
  1121  	}
  1122  	fs.mu.RUnlock()
  1123  
  1124  	err = fh.WriteFile(op.Offset, op.Data)
  1125  
  1126  	return
  1127  }
  1128  
  1129  func (fs *Goofys) Unlink(
  1130  	ctx context.Context,
  1131  	op *fuseops.UnlinkOp) (err error) {
  1132  
  1133  	fs.mu.RLock()
  1134  	parent := fs.getInodeOrDie(op.Parent)
  1135  	fs.mu.RUnlock()
  1136  
  1137  	err = parent.Unlink(op.Name)
  1138  	return
  1139  }
  1140  
  1141  // rename("from", "to") causes the kernel to send lookup of "from" and
  1142  // "to" prior to sending rename to us
  1143  func (fs *Goofys) Rename(
  1144  	ctx context.Context,
  1145  	op *fuseops.RenameOp) (err error) {
  1146  
  1147  	fs.mu.RLock()
  1148  	parent := fs.getInodeOrDie(op.OldParent)
  1149  	newParent := fs.getInodeOrDie(op.NewParent)
  1150  	fs.mu.RUnlock()
  1151  
  1152  	// XXX don't hold the lock the entire time
  1153  	if op.OldParent == op.NewParent {
  1154  		parent.mu.Lock()
  1155  		defer parent.mu.Unlock()
  1156  	} else {
  1157  		// lock ordering to prevent deadlock
  1158  		if op.OldParent < op.NewParent {
  1159  			parent.mu.Lock()
  1160  			newParent.mu.Lock()
  1161  		} else {
  1162  			newParent.mu.Lock()
  1163  			parent.mu.Lock()
  1164  		}
  1165  		defer parent.mu.Unlock()
  1166  		defer newParent.mu.Unlock()
  1167  	}
  1168  
  1169  	err = parent.Rename(op.OldName, newParent, op.NewName)
  1170  	if err != nil {
  1171  		if err == fuse.ENOENT {
  1172  			// if the source doesn't exist, it could be
  1173  			// because this is a new file and we haven't
  1174  			// flushed it yet, pretend that's ok because
  1175  			// when we flush we will handle the rename
  1176  			inode := parent.findChildUnlocked(op.OldName)
  1177  			if inode != nil && atomic.LoadInt32(&inode.fileHandles) != 0 {
  1178  				err = nil
  1179  			}
  1180  		}
  1181  	}
  1182  	if err == nil {
  1183  		inode := parent.findChildUnlocked(op.OldName)
  1184  		if inode != nil {
  1185  			inode.mu.Lock()
  1186  			defer inode.mu.Unlock()
  1187  
  1188  			parent.removeChildUnlocked(inode)
  1189  
  1190  			newNode := newParent.findChildUnlocked(op.NewName)
  1191  			if newNode != nil {
  1192  				// this file's been overwritten, it's
  1193  				// been detached but we can't delete
  1194  				// it just yet, because the kernel
  1195  				// will still send forget ops to us
  1196  				newParent.removeChildUnlocked(newNode)
  1197  				newNode.Parent = nil
  1198  			}
  1199  
  1200  			inode.Name = &op.NewName
  1201  			inode.Parent = newParent
  1202  			newParent.insertChildUnlocked(inode)
  1203  		}
  1204  	}
  1205  	return
  1206  }