github.com/shindo/goofys@v0.24.1-0.20210326210429-9e930f0b2d5c/internal/goofys.go (about)

     1  // Copyright 2015 - 2017 Ka-Hing Cheung
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package internal
    16  
    17  import (
    18  	. "github.com/kahing/goofys/api/common"
    19  
    20  	"context"
    21  	"fmt"
    22  	"math/rand"
    23  	"net/url"
    24  	"runtime/debug"
    25  	"strings"
    26  	"sync"
    27  	"sync/atomic"
    28  	"syscall"
    29  	"time"
    30  
    31  	"github.com/aws/aws-sdk-go/aws/awserr"
    32  
    33  	"github.com/jacobsa/fuse"
    34  	"github.com/jacobsa/fuse/fuseops"
    35  	"github.com/jacobsa/fuse/fuseutil"
    36  
    37  	"github.com/sirupsen/logrus"
    38  	"net/http"
    39  )
    40  
    41  // goofys is a Filey System written in Go. All the backend data is
    42  // stored on S3 as is. It's a Filey System instead of a File System
    43  // because it makes minimal effort at being POSIX
    44  // compliant. Particularly things that are difficult to support on S3
    45  // or would translate into more than one round-trip would either fail
    46  // (rename non-empty dir) or faked (no per-file permission). goofys
    47  // does not have a on disk data cache, and consistency model is
    48  // close-to-open.
    49  
    50  type Goofys struct {
    51  	fuseutil.NotImplementedFileSystem
    52  	bucket string
    53  
    54  	flags *FlagStorage
    55  
    56  	umask uint32
    57  
    58  	gcs       bool
    59  	rootAttrs InodeAttributes
    60  
    61  	bufferPool *BufferPool
    62  
    63  	// A lock protecting the state of the file system struct itself (distinct
    64  	// from per-inode locks). Make sure to see the notes on lock ordering above.
    65  	mu sync.RWMutex
    66  
    67  	// The next inode ID to hand out. We assume that this will never overflow,
    68  	// since even if we were handing out inode IDs at 4 GHz, it would still take
    69  	// over a century to do so.
    70  	//
    71  	// GUARDED_BY(mu)
    72  	nextInodeID fuseops.InodeID
    73  
    74  	// The collection of live inodes, keyed by inode ID. No ID less than
    75  	// fuseops.RootInodeID is ever used.
    76  	//
    77  	// INVARIANT: For all keys k, fuseops.RootInodeID <= k < nextInodeID
    78  	// INVARIANT: For all keys k, inodes[k].ID() == k
    79  	// INVARIANT: inodes[fuseops.RootInodeID] is missing or of type inode.DirInode
    80  	// INVARIANT: For all v, if IsDirName(v.Name()) then v is inode.DirInode
    81  	//
    82  	// GUARDED_BY(mu)
    83  	inodes map[fuseops.InodeID]*Inode
    84  
    85  	nextHandleID fuseops.HandleID
    86  	dirHandles   map[fuseops.HandleID]*DirHandle
    87  
    88  	fileHandles map[fuseops.HandleID]*FileHandle
    89  
    90  	replicators *Ticket
    91  	restorers   *Ticket
    92  
    93  	forgotCnt uint32
    94  }
    95  
    96  var s3Log = GetLogger("s3")
    97  var log = GetLogger("main")
    98  var fuseLog = GetLogger("fuse")
    99  
   100  func NewBackend(bucket string, flags *FlagStorage) (cloud StorageBackend, err error) {
   101  	if flags.Backend == nil {
   102  		flags.Backend = (&S3Config{}).Init()
   103  	}
   104  
   105  	if config, ok := flags.Backend.(*AZBlobConfig); ok {
   106  		cloud, err = NewAZBlob(bucket, config)
   107  	} else if config, ok := flags.Backend.(*ADLv1Config); ok {
   108  		cloud, err = NewADLv1(bucket, flags, config)
   109  	} else if config, ok := flags.Backend.(*ADLv2Config); ok {
   110  		cloud, err = NewADLv2(bucket, flags, config)
   111  	} else if config, ok := flags.Backend.(*S3Config); ok {
   112  		if strings.HasSuffix(flags.Endpoint, "/storage.googleapis.com") {
   113  			cloud, err = NewGCS3(bucket, flags, config)
   114  		} else {
   115  			cloud, err = NewS3(bucket, flags, config)
   116  		}
   117  	} else {
   118  		err = fmt.Errorf("Unknown backend config: %T", flags.Backend)
   119  	}
   120  
   121  	return
   122  }
   123  
   124  type BucketSpec struct {
   125  	Scheme string
   126  	Bucket string
   127  	Prefix string
   128  }
   129  
   130  func ParseBucketSpec(bucket string) (spec BucketSpec, err error) {
   131  	if strings.Index(bucket, "://") != -1 {
   132  		var u *url.URL
   133  		u, err = url.Parse(bucket)
   134  		if err != nil {
   135  			return
   136  		}
   137  
   138  		spec.Scheme = u.Scheme
   139  		spec.Bucket = u.Host
   140  		if u.User != nil {
   141  			// wasb url can be wasb://container@storage-end-point
   142  			// we want to return the entire thing as bucket
   143  			spec.Bucket = u.User.String() + "@" + u.Host
   144  		}
   145  		spec.Prefix = u.Path
   146  	} else {
   147  		spec.Scheme = "s3"
   148  
   149  		colon := strings.Index(bucket, ":")
   150  		if colon != -1 {
   151  			spec.Prefix = bucket[colon+1:]
   152  			spec.Bucket = bucket[0:colon]
   153  		} else {
   154  			spec.Bucket = bucket
   155  		}
   156  	}
   157  
   158  	spec.Prefix = strings.Trim(spec.Prefix, "/")
   159  	if spec.Prefix != "" {
   160  		spec.Prefix += "/"
   161  	}
   162  	return
   163  }
   164  
   165  func NewGoofys(ctx context.Context, bucket string, flags *FlagStorage) *Goofys {
   166  	return newGoofys(ctx, bucket, flags, NewBackend)
   167  }
   168  
   169  func newGoofys(ctx context.Context, bucket string, flags *FlagStorage,
   170  	newBackend func(string, *FlagStorage) (StorageBackend, error)) *Goofys {
   171  	// Set up the basic struct.
   172  	fs := &Goofys{
   173  		bucket: bucket,
   174  		flags:  flags,
   175  		umask:  0122,
   176  	}
   177  
   178  	var prefix string
   179  	colon := strings.Index(bucket, ":")
   180  	if colon != -1 {
   181  		prefix = bucket[colon+1:]
   182  		prefix = strings.Trim(prefix, "/")
   183  		if prefix != "" {
   184  			prefix += "/"
   185  		}
   186  
   187  		fs.bucket = bucket[0:colon]
   188  		bucket = fs.bucket
   189  	}
   190  
   191  	if flags.DebugS3 {
   192  		s3Log.Level = logrus.DebugLevel
   193  	}
   194  
   195  	cloud, err := newBackend(bucket, flags)
   196  	if err != nil {
   197  		log.Errorf("Unable to setup backend: %v", err)
   198  		return nil
   199  	}
   200  	_, fs.gcs = cloud.Delegate().(*GCS3)
   201  
   202  	randomObjectName := prefix + (RandStringBytesMaskImprSrc(32))
   203  	err = cloud.Init(randomObjectName)
   204  	if err != nil {
   205  		log.Errorf("Unable to access '%v': %v", bucket, err)
   206  		return nil
   207  	}
   208  	go cloud.MultipartExpire(&MultipartExpireInput{})
   209  
   210  	now := time.Now()
   211  	fs.rootAttrs = InodeAttributes{
   212  		Size:  4096,
   213  		Mtime: now,
   214  	}
   215  
   216  	fs.bufferPool = BufferPool{}.Init()
   217  
   218  	fs.nextInodeID = fuseops.RootInodeID + 1
   219  	fs.inodes = make(map[fuseops.InodeID]*Inode)
   220  	root := NewInode(fs, nil, PString(""))
   221  	root.Id = fuseops.RootInodeID
   222  	root.ToDir()
   223  	root.dir.cloud = cloud
   224  	root.dir.mountPrefix = prefix
   225  	root.Attributes.Mtime = fs.rootAttrs.Mtime
   226  
   227  	fs.inodes[fuseops.RootInodeID] = root
   228  	fs.addDotAndDotDot(root)
   229  
   230  	fs.nextHandleID = 1
   231  	fs.dirHandles = make(map[fuseops.HandleID]*DirHandle)
   232  
   233  	fs.fileHandles = make(map[fuseops.HandleID]*FileHandle)
   234  
   235  	fs.replicators = Ticket{Total: 16}.Init()
   236  	fs.restorers = Ticket{Total: 20}.Init()
   237  
   238  	return fs
   239  }
   240  
   241  // from https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
   242  func RandStringBytesMaskImprSrc(n int) string {
   243  	const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
   244  	const (
   245  		letterIdxBits = 6                    // 6 bits to represent a letter index
   246  		letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
   247  		letterIdxMax  = 63 / letterIdxBits   // # of letter indices fitting in 63 bits
   248  	)
   249  	src := rand.NewSource(time.Now().UnixNano())
   250  	b := make([]byte, n)
   251  	// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
   252  	for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
   253  		if remain == 0 {
   254  			cache, remain = src.Int63(), letterIdxMax
   255  		}
   256  		if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
   257  			b[i] = letterBytes[idx]
   258  			i--
   259  		}
   260  		cache >>= letterIdxBits
   261  		remain--
   262  	}
   263  
   264  	return string(b)
   265  }
   266  
   267  func (fs *Goofys) SigUsr1() {
   268  	fs.mu.RLock()
   269  
   270  	log.Infof("forgot %v inodes", fs.forgotCnt)
   271  	log.Infof("%v inodes", len(fs.inodes))
   272  	fs.mu.RUnlock()
   273  	debug.FreeOSMemory()
   274  }
   275  
   276  // Find the given inode. Panic if it doesn't exist.
   277  //
   278  // RLOCKS_REQUIRED(fs.mu)
   279  func (fs *Goofys) getInodeOrDie(id fuseops.InodeID) (inode *Inode) {
   280  	inode = fs.inodes[id]
   281  	if inode == nil {
   282  		panic(fmt.Sprintf("Unknown inode: %v", id))
   283  	}
   284  
   285  	return
   286  }
   287  
   288  type Mount struct {
   289  	// Mount Point relative to goofys's root mount.
   290  	name    string
   291  	cloud   StorageBackend
   292  	prefix  string
   293  	mounted bool
   294  }
   295  
   296  func (fs *Goofys) mount(mp *Inode, b *Mount) {
   297  	if b.mounted {
   298  		return
   299  	}
   300  
   301  	name := strings.Trim(b.name, "/")
   302  
   303  	// create path for the mount. AttrTime is set to TIME_MAX so
   304  	// they will never expire and be removed. But DirTime is not
   305  	// so we will still consult the underlining cloud for listing
   306  	// (which will then be merged with the cached result)
   307  
   308  	for {
   309  		idx := strings.Index(name, "/")
   310  		if idx == -1 {
   311  			break
   312  		}
   313  		dirName := name[0:idx]
   314  		name = name[idx+1:]
   315  
   316  		mp.mu.Lock()
   317  		dirInode := mp.findChildUnlocked(dirName)
   318  		if dirInode == nil {
   319  			fs.mu.Lock()
   320  
   321  			dirInode = NewInode(fs, mp, &dirName)
   322  			dirInode.ToDir()
   323  			dirInode.AttrTime = TIME_MAX
   324  
   325  			fs.insertInode(mp, dirInode)
   326  			fs.mu.Unlock()
   327  		}
   328  		mp.mu.Unlock()
   329  		mp = dirInode
   330  	}
   331  
   332  	mp.mu.Lock()
   333  	defer mp.mu.Unlock()
   334  
   335  	prev := mp.findChildUnlocked(name)
   336  	if prev == nil {
   337  		mountInode := NewInode(fs, mp, &name)
   338  		mountInode.ToDir()
   339  		mountInode.dir.cloud = b.cloud
   340  		mountInode.dir.mountPrefix = b.prefix
   341  		mountInode.AttrTime = TIME_MAX
   342  
   343  		fs.mu.Lock()
   344  		defer fs.mu.Unlock()
   345  
   346  		fs.insertInode(mp, mountInode)
   347  		prev = mountInode
   348  	} else {
   349  		if !prev.isDir() {
   350  			panic(fmt.Sprintf("inode %v is not a directory", *prev.FullName()))
   351  		}
   352  
   353  		// This inode might have some cached data from a parent mount.
   354  		// Clear this cache by resetting the DirTime.
   355  		// Note: resetDirTimeRec should be called without holding the lock.
   356  		prev.resetDirTimeRec()
   357  		prev.mu.Lock()
   358  		defer prev.mu.Unlock()
   359  		prev.dir.cloud = b.cloud
   360  		prev.dir.mountPrefix = b.prefix
   361  		prev.AttrTime = TIME_MAX
   362  
   363  	}
   364  	fuseLog.Infof("mounted /%v", *prev.FullName())
   365  	b.mounted = true
   366  }
   367  
   368  func (fs *Goofys) MountAll(mounts []*Mount) {
   369  	fs.mu.RLock()
   370  	root := fs.getInodeOrDie(fuseops.RootInodeID)
   371  	fs.mu.RUnlock()
   372  
   373  	for _, m := range mounts {
   374  		fs.mount(root, m)
   375  	}
   376  }
   377  
   378  func (fs *Goofys) Mount(mount *Mount) {
   379  	fs.mu.RLock()
   380  	root := fs.getInodeOrDie(fuseops.RootInodeID)
   381  	fs.mu.RUnlock()
   382  	fs.mount(root, mount)
   383  }
   384  
   385  func (fs *Goofys) Unmount(mountPoint string) {
   386  	fs.mu.RLock()
   387  	mp := fs.getInodeOrDie(fuseops.RootInodeID)
   388  	fs.mu.RUnlock()
   389  
   390  	fuseLog.Infof("Attempting to unmount %v", mountPoint)
   391  	path := strings.Split(strings.Trim(mountPoint, "/"), "/")
   392  	for _, localName := range path {
   393  		dirInode := mp.findChild(localName)
   394  		if dirInode == nil || !dirInode.isDir() {
   395  			fuseLog.Errorf("Failed to find directory:%v while unmounting %v. "+
   396  				"Ignoring the unmount operation.", localName, mountPoint)
   397  			return
   398  		}
   399  		mp = dirInode
   400  	}
   401  	mp.ResetForUnmount()
   402  	return
   403  }
   404  
   405  func (fs *Goofys) StatFS(
   406  	ctx context.Context,
   407  	op *fuseops.StatFSOp) (err error) {
   408  
   409  	const BLOCK_SIZE = 4096
   410  	const TOTAL_SPACE = 1 * 1024 * 1024 * 1024 * 1024 * 1024 // 1PB
   411  	const TOTAL_BLOCKS = TOTAL_SPACE / BLOCK_SIZE
   412  	const INODES = 1 * 1000 * 1000 * 1000 // 1 billion
   413  	op.BlockSize = BLOCK_SIZE
   414  	op.Blocks = TOTAL_BLOCKS
   415  	op.BlocksFree = TOTAL_BLOCKS
   416  	op.BlocksAvailable = TOTAL_BLOCKS
   417  	op.IoSize = 1 * 1024 * 1024 // 1MB
   418  	op.Inodes = INODES
   419  	op.InodesFree = INODES
   420  	return
   421  }
   422  
   423  func (fs *Goofys) GetInodeAttributes(
   424  	ctx context.Context,
   425  	op *fuseops.GetInodeAttributesOp) (err error) {
   426  
   427  	fs.mu.RLock()
   428  	inode := fs.getInodeOrDie(op.Inode)
   429  	fs.mu.RUnlock()
   430  
   431  	attr, err := inode.GetAttributes()
   432  	if err == nil {
   433  		op.Attributes = *attr
   434  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   435  	}
   436  
   437  	return
   438  }
   439  
   440  func (fs *Goofys) GetXattr(ctx context.Context,
   441  	op *fuseops.GetXattrOp) (err error) {
   442  	fs.mu.RLock()
   443  	inode := fs.getInodeOrDie(op.Inode)
   444  	fs.mu.RUnlock()
   445  
   446  	value, err := inode.GetXattr(op.Name)
   447  	if err != nil {
   448  		return
   449  	}
   450  
   451  	op.BytesRead = len(value)
   452  
   453  	if len(op.Dst) != 0 {
   454  		if len(op.Dst) < op.BytesRead {
   455  			return syscall.ERANGE
   456  		}
   457  
   458  		copy(op.Dst, value)
   459  	}
   460  	return
   461  }
   462  
   463  func (fs *Goofys) ListXattr(ctx context.Context,
   464  	op *fuseops.ListXattrOp) (err error) {
   465  	fs.mu.RLock()
   466  	inode := fs.getInodeOrDie(op.Inode)
   467  	fs.mu.RUnlock()
   468  
   469  	xattrs, err := inode.ListXattr()
   470  
   471  	ncopied := 0
   472  
   473  	for _, name := range xattrs {
   474  		buf := op.Dst[ncopied:]
   475  		nlen := len(name) + 1
   476  
   477  		if nlen <= len(buf) {
   478  			copy(buf, name)
   479  			ncopied += nlen
   480  			buf[nlen-1] = '\x00'
   481  		}
   482  
   483  		op.BytesRead += nlen
   484  	}
   485  
   486  	if len(op.Dst) != 0 && ncopied < op.BytesRead {
   487  		err = syscall.ERANGE
   488  	}
   489  
   490  	return
   491  }
   492  
   493  func (fs *Goofys) RemoveXattr(ctx context.Context,
   494  	op *fuseops.RemoveXattrOp) (err error) {
   495  	fs.mu.RLock()
   496  	inode := fs.getInodeOrDie(op.Inode)
   497  	fs.mu.RUnlock()
   498  
   499  	err = inode.RemoveXattr(op.Name)
   500  
   501  	return
   502  }
   503  
   504  func (fs *Goofys) SetXattr(ctx context.Context,
   505  	op *fuseops.SetXattrOp) (err error) {
   506  	fs.mu.RLock()
   507  	inode := fs.getInodeOrDie(op.Inode)
   508  	fs.mu.RUnlock()
   509  
   510  	err = inode.SetXattr(op.Name, op.Value, op.Flags)
   511  	return
   512  }
   513  
   514  func mapHttpError(status int) error {
   515  	switch status {
   516  	case 400:
   517  		return fuse.EINVAL
   518  	case 401:
   519  		return syscall.EACCES
   520  	case 403:
   521  		return syscall.EACCES
   522  	case 404:
   523  		return fuse.ENOENT
   524  	case 405:
   525  		return syscall.ENOTSUP
   526  	case http.StatusConflict:
   527  		return syscall.EINTR
   528  	case 429:
   529  		return syscall.EAGAIN
   530  	case 500:
   531  		return syscall.EAGAIN
   532  	default:
   533  		return nil
   534  	}
   535  }
   536  
   537  func mapAwsError(err error) error {
   538  	if err == nil {
   539  		return nil
   540  	}
   541  
   542  	if awsErr, ok := err.(awserr.Error); ok {
   543  		switch awsErr.Code() {
   544  		case "BucketRegionError":
   545  			// don't need to log anything, we should detect region after
   546  			return err
   547  		case "NoSuchBucket":
   548  			return syscall.ENXIO
   549  		case "BucketAlreadyOwnedByYou":
   550  			return fuse.EEXIST
   551  		}
   552  
   553  		if reqErr, ok := err.(awserr.RequestFailure); ok {
   554  			// A service error occurred
   555  			err = mapHttpError(reqErr.StatusCode())
   556  			if err != nil {
   557  				return err
   558  			} else {
   559  				s3Log.Errorf("http=%v %v s3=%v request=%v\n",
   560  					reqErr.StatusCode(), reqErr.Message(),
   561  					awsErr.Code(), reqErr.RequestID())
   562  				return reqErr
   563  			}
   564  		} else {
   565  			// Generic AWS Error with Code, Message, and original error (if any)
   566  			s3Log.Errorf("code=%v msg=%v, err=%v\n", awsErr.Code(), awsErr.Message(), awsErr.OrigErr())
   567  			return awsErr
   568  		}
   569  	} else {
   570  		return err
   571  	}
   572  }
   573  
   574  // note that this is NOT the same as url.PathEscape in golang 1.8,
   575  // as this preserves / and url.PathEscape converts / to %2F
   576  func pathEscape(path string) string {
   577  	u := url.URL{Path: path}
   578  	return u.EscapedPath()
   579  }
   580  
   581  func (fs *Goofys) allocateInodeId() (id fuseops.InodeID) {
   582  	id = fs.nextInodeID
   583  	fs.nextInodeID++
   584  	return
   585  }
   586  
   587  func expired(cache time.Time, ttl time.Duration) bool {
   588  	now := time.Now()
   589  	if cache.After(now) {
   590  		return false
   591  	}
   592  	return !cache.Add(ttl).After(now)
   593  }
   594  
   595  func (fs *Goofys) LookUpInode(
   596  	ctx context.Context,
   597  	op *fuseops.LookUpInodeOp) (err error) {
   598  
   599  	var inode *Inode
   600  	var ok bool
   601  	defer func() { fuseLog.Debugf("<-- LookUpInode %v %v %v", op.Parent, op.Name, err) }()
   602  
   603  	fs.mu.RLock()
   604  	parent := fs.getInodeOrDie(op.Parent)
   605  	fs.mu.RUnlock()
   606  
   607  	parent.mu.Lock()
   608  	inode = parent.findChildUnlocked(op.Name)
   609  	if inode != nil {
   610  		ok = true
   611  		inode.Ref()
   612  
   613  		if expired(inode.AttrTime, fs.flags.StatCacheTTL) {
   614  			ok = false
   615  			if atomic.LoadInt32(&inode.fileHandles) != 0 {
   616  				// we have an open file handle, object
   617  				// in S3 may not represent the true
   618  				// state of the file anyway, so just
   619  				// return what we know which is
   620  				// potentially more accurate
   621  				ok = true
   622  			} else {
   623  				inode.logFuse("lookup expired")
   624  			}
   625  		}
   626  	} else {
   627  		ok = false
   628  	}
   629  	parent.mu.Unlock()
   630  
   631  	if !ok {
   632  		var newInode *Inode
   633  
   634  		newInode, err = parent.LookUp(op.Name)
   635  		if err == fuse.ENOENT && inode != nil && inode.isDir() {
   636  			// we may not be able to look up an implicit
   637  			// dir if all the children are removed, so we
   638  			// just pretend this dir is still around
   639  			err = nil
   640  		} else if err != nil {
   641  			if inode != nil {
   642  				// just kidding! pretend we didn't up the ref
   643  				fs.mu.Lock()
   644  				defer fs.mu.Unlock()
   645  
   646  				stale := inode.DeRef(1)
   647  				if stale {
   648  					delete(fs.inodes, inode.Id)
   649  					parent.removeChild(inode)
   650  				}
   651  			}
   652  			return err
   653  		}
   654  
   655  		if inode == nil {
   656  			parent.mu.Lock()
   657  			// check again if it's there, could have been
   658  			// added by another lookup or readdir
   659  			inode = parent.findChildUnlocked(op.Name)
   660  			if inode == nil {
   661  				fs.mu.Lock()
   662  				inode = newInode
   663  				fs.insertInode(parent, inode)
   664  				fs.mu.Unlock()
   665  			}
   666  			parent.mu.Unlock()
   667  		} else {
   668  			inode.mu.Lock()
   669  
   670  			if newInode != nil {
   671  				// if only size changed, kernel seems to
   672  				// automatically drop cache
   673  				if !inode.Attributes.Equal(newInode.Attributes) {
   674  					inode.logFuse("invalidate cache because attributes changed", inode.Attributes, newInode.Attributes)
   675  					inode.invalidateCache = true
   676  				} else if inode.knownETag != nil &&
   677  					newInode.knownETag != nil &&
   678  					*inode.knownETag != *newInode.knownETag {
   679  					// if this is a new file (ie:
   680  					// inode.knownETag is nil),
   681  					// then prefer to read our own
   682  					// write then reading updated
   683  					// data
   684  					inode.logFuse("invalidate cache because etag changed", *inode.knownETag, *newInode.knownETag)
   685  					inode.invalidateCache = true
   686  				}
   687  
   688  				if newInode.Attributes.Mtime.IsZero() {
   689  					// this can happen if it's an
   690  					// implicit dir, use the last
   691  					// known value
   692  					newInode.Attributes.Mtime = inode.Attributes.Mtime
   693  				}
   694  				inode.Attributes = newInode.Attributes
   695  				inode.knownETag = newInode.knownETag
   696  			}
   697  			inode.AttrTime = time.Now()
   698  
   699  			inode.mu.Unlock()
   700  		}
   701  	}
   702  
   703  	op.Entry.Child = inode.Id
   704  	op.Entry.Attributes = inode.InflateAttributes()
   705  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   706  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
   707  
   708  	return
   709  }
   710  
   711  // LOCKS_REQUIRED(fs.mu)
   712  // LOCKS_REQUIRED(parent.mu)
   713  func (fs *Goofys) insertInode(parent *Inode, inode *Inode) {
   714  	addInode := false
   715  	if *inode.Name == "." {
   716  		inode.Id = parent.Id
   717  	} else if *inode.Name == ".." {
   718  		inode.Id = fuseops.InodeID(fuseops.RootInodeID)
   719  		if parent.Parent != nil {
   720  			inode.Id = parent.Parent.Id
   721  		}
   722  	} else {
   723  		if inode.Id != 0 {
   724  			panic(fmt.Sprintf("inode id is set: %v %v", *inode.Name, inode.Id))
   725  		}
   726  		inode.Id = fs.allocateInodeId()
   727  		addInode = true
   728  	}
   729  	parent.insertChildUnlocked(inode)
   730  	if addInode {
   731  		fs.inodes[inode.Id] = inode
   732  
   733  		// if we are inserting a new directory, also create
   734  		// the child . and ..
   735  		if inode.isDir() {
   736  			fs.addDotAndDotDot(inode)
   737  		}
   738  	}
   739  }
   740  
   741  func (fs *Goofys) addDotAndDotDot(dir *Inode) {
   742  	dot := NewInode(fs, dir, PString("."))
   743  	dot.ToDir()
   744  	dot.AttrTime = TIME_MAX
   745  	fs.insertInode(dir, dot)
   746  
   747  	dot = NewInode(fs, dir, PString(".."))
   748  	dot.ToDir()
   749  	dot.AttrTime = TIME_MAX
   750  	fs.insertInode(dir, dot)
   751  }
   752  
   753  func (fs *Goofys) ForgetInode(
   754  	ctx context.Context,
   755  	op *fuseops.ForgetInodeOp) (err error) {
   756  
   757  	fs.mu.RLock()
   758  	inode := fs.getInodeOrDie(op.Inode)
   759  	fs.mu.RUnlock()
   760  
   761  	if inode.Parent != nil {
   762  		inode.Parent.mu.Lock()
   763  		defer inode.Parent.mu.Unlock()
   764  	}
   765  	stale := inode.DeRef(op.N)
   766  
   767  	if stale {
   768  		fs.mu.Lock()
   769  		defer fs.mu.Unlock()
   770  
   771  		delete(fs.inodes, op.Inode)
   772  		fs.forgotCnt += 1
   773  
   774  		if inode.Parent != nil {
   775  			inode.Parent.removeChildUnlocked(inode)
   776  		}
   777  	}
   778  
   779  	return
   780  }
   781  
   782  func (fs *Goofys) OpenDir(
   783  	ctx context.Context,
   784  	op *fuseops.OpenDirOp) (err error) {
   785  	fs.mu.Lock()
   786  
   787  	handleID := fs.nextHandleID
   788  	fs.nextHandleID++
   789  
   790  	in := fs.getInodeOrDie(op.Inode)
   791  	fs.mu.Unlock()
   792  
   793  	// XXX/is this a dir?
   794  	dh := in.OpenDir()
   795  
   796  	fs.mu.Lock()
   797  	defer fs.mu.Unlock()
   798  
   799  	fs.dirHandles[handleID] = dh
   800  	op.Handle = handleID
   801  
   802  	return
   803  }
   804  
   805  func makeDirEntry(en *DirHandleEntry) fuseutil.Dirent {
   806  	return fuseutil.Dirent{
   807  		Name:   en.Name,
   808  		Type:   en.Type,
   809  		Inode:  en.Inode,
   810  		Offset: en.Offset,
   811  	}
   812  }
   813  
   814  func (fs *Goofys) ReadDir(
   815  	ctx context.Context,
   816  	op *fuseops.ReadDirOp) (err error) {
   817  
   818  	// Find the handle.
   819  	fs.mu.RLock()
   820  	dh := fs.dirHandles[op.Handle]
   821  	fs.mu.RUnlock()
   822  
   823  	if dh == nil {
   824  		panic(fmt.Sprintf("can't find dh=%v", op.Handle))
   825  	}
   826  
   827  	inode := dh.inode
   828  	inode.logFuse("ReadDir", op.Offset)
   829  
   830  	dh.mu.Lock()
   831  	defer dh.mu.Unlock()
   832  
   833  	for i := op.Offset; ; i++ {
   834  		e, err := dh.ReadDir(i)
   835  		if err != nil {
   836  			return err
   837  		}
   838  		if e == nil {
   839  			break
   840  		}
   841  
   842  		if e.Inode == 0 {
   843  			panic(fmt.Sprintf("unset inode %v", e.Name))
   844  		}
   845  
   846  		n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], makeDirEntry(e))
   847  		if n == 0 {
   848  			break
   849  		}
   850  
   851  		dh.inode.logFuse("<-- ReadDir", e.Name, e.Offset)
   852  
   853  		op.BytesRead += n
   854  	}
   855  
   856  	return
   857  }
   858  
   859  func (fs *Goofys) ReleaseDirHandle(
   860  	ctx context.Context,
   861  	op *fuseops.ReleaseDirHandleOp) (err error) {
   862  
   863  	fs.mu.Lock()
   864  	defer fs.mu.Unlock()
   865  
   866  	dh := fs.dirHandles[op.Handle]
   867  	dh.CloseDir()
   868  
   869  	fuseLog.Debugln("ReleaseDirHandle", *dh.inode.FullName())
   870  
   871  	delete(fs.dirHandles, op.Handle)
   872  
   873  	return
   874  }
   875  
   876  func (fs *Goofys) OpenFile(
   877  	ctx context.Context,
   878  	op *fuseops.OpenFileOp) (err error) {
   879  	fs.mu.RLock()
   880  	in := fs.getInodeOrDie(op.Inode)
   881  	fs.mu.RUnlock()
   882  
   883  	fh, err := in.OpenFile(op.Metadata)
   884  	if err != nil {
   885  		return
   886  	}
   887  
   888  	fs.mu.Lock()
   889  
   890  	handleID := fs.nextHandleID
   891  	fs.nextHandleID++
   892  
   893  	fs.fileHandles[handleID] = fh
   894  	fs.mu.Unlock()
   895  
   896  	op.Handle = handleID
   897  
   898  	in.mu.Lock()
   899  	defer in.mu.Unlock()
   900  
   901  	// this flag appears to tell the kernel if this open should
   902  	// use the page cache or not. "use" here means:
   903  	//
   904  	// read will read from cache
   905  	// write will populate cache
   906  	//
   907  	// because we have one flag to control both behaviors, if an
   908  	// object is updated out-of-band and we need to invalidate
   909  	// cache, and we write to this object locally, subsequent read
   910  	// will not read from cache
   911  	//
   912  	// see tests TestReadNewFileWithExternalChangesFuse and
   913  	// TestReadMyOwnWrite*Fuse
   914  	op.KeepPageCache = !in.invalidateCache
   915  	fh.keepPageCache = op.KeepPageCache
   916  	in.invalidateCache = false
   917  
   918  	return
   919  }
   920  
   921  func (fs *Goofys) ReadFile(
   922  	ctx context.Context,
   923  	op *fuseops.ReadFileOp) (err error) {
   924  
   925  	fs.mu.RLock()
   926  	fh := fs.fileHandles[op.Handle]
   927  	fs.mu.RUnlock()
   928  
   929  	op.BytesRead, err = fh.ReadFile(op.Offset, op.Dst)
   930  
   931  	return
   932  }
   933  
   934  func (fs *Goofys) SyncFile(
   935  	ctx context.Context,
   936  	op *fuseops.SyncFileOp) (err error) {
   937  
   938  	// intentionally ignored, so that write()/sync()/write() works
   939  	// see https://github.com/kahing/goofys/issues/154
   940  	return
   941  }
   942  
   943  func (fs *Goofys) FlushFile(
   944  	ctx context.Context,
   945  	op *fuseops.FlushFileOp) (err error) {
   946  
   947  	fs.mu.RLock()
   948  	fh := fs.fileHandles[op.Handle]
   949  	fs.mu.RUnlock()
   950  
   951  	// If the file handle has a tgid, then flush the file only if the
   952  	// incoming request's tgid matches the tgid in the file handle.
   953  	// This check helps us with scenarios like https://github.com/kahing/goofys/issues/273
   954  	// Also see goofys_test.go:TestClientForkExec.
   955  	if fh.Tgid != nil {
   956  		tgid, err := GetTgid(op.Metadata.Pid)
   957  		if err != nil {
   958  			fh.inode.logFuse("<-- FlushFile",
   959  				fmt.Sprintf("Failed to retrieve tgid from op.Metadata.Pid. FlushFileOp:%#v, err:%v",
   960  					op, err))
   961  			return fuse.EIO
   962  		}
   963  		if *fh.Tgid != *tgid {
   964  			fh.inode.logFuse("<-- FlushFile",
   965  				"Operation ignored",
   966  				fmt.Sprintf("fh.Pid:%v != tgid:%v, op:%#v", *fh.Tgid, *tgid, op))
   967  			return nil
   968  		}
   969  	}
   970  
   971  	err = fh.FlushFile()
   972  	if err != nil {
   973  		// if we returned success from creat() earlier
   974  		// linux may think this file exists even when it doesn't,
   975  		// until TypeCacheTTL is over
   976  		// TODO: figure out a way to make the kernel forget this inode
   977  		// see TestWriteAnonymousFuse
   978  		fs.mu.RLock()
   979  		inode := fs.getInodeOrDie(op.Inode)
   980  		fs.mu.RUnlock()
   981  
   982  		if inode.KnownSize == nil {
   983  			inode.AttrTime = time.Time{}
   984  		}
   985  
   986  	}
   987  	fh.inode.logFuse("<-- FlushFile", err, op.Handle, op.Inode)
   988  	return
   989  }
   990  
   991  func (fs *Goofys) ReleaseFileHandle(
   992  	ctx context.Context,
   993  	op *fuseops.ReleaseFileHandleOp) (err error) {
   994  	fs.mu.Lock()
   995  	defer fs.mu.Unlock()
   996  	fh := fs.fileHandles[op.Handle]
   997  	fh.Release()
   998  
   999  	fuseLog.Debugln("ReleaseFileHandle", *fh.inode.FullName(), op.Handle, fh.inode.Id)
  1000  
  1001  	delete(fs.fileHandles, op.Handle)
  1002  
  1003  	// try to compact heap
  1004  	//fs.bufferPool.MaybeGC()
  1005  	return
  1006  }
  1007  
  1008  func (fs *Goofys) CreateFile(
  1009  	ctx context.Context,
  1010  	op *fuseops.CreateFileOp) (err error) {
  1011  
  1012  	fs.mu.RLock()
  1013  	parent := fs.getInodeOrDie(op.Parent)
  1014  	fs.mu.RUnlock()
  1015  
  1016  	inode, fh := parent.Create(op.Name, op.Metadata)
  1017  
  1018  	parent.mu.Lock()
  1019  
  1020  	fs.mu.Lock()
  1021  	defer fs.mu.Unlock()
  1022  	fs.insertInode(parent, inode)
  1023  
  1024  	parent.mu.Unlock()
  1025  
  1026  	op.Entry.Child = inode.Id
  1027  	op.Entry.Attributes = inode.InflateAttributes()
  1028  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1029  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
  1030  
  1031  	// Allocate a handle.
  1032  	handleID := fs.nextHandleID
  1033  	fs.nextHandleID++
  1034  
  1035  	fs.fileHandles[handleID] = fh
  1036  
  1037  	op.Handle = handleID
  1038  
  1039  	inode.logFuse("<-- CreateFile")
  1040  
  1041  	return
  1042  }
  1043  
  1044  func (fs *Goofys) MkDir(
  1045  	ctx context.Context,
  1046  	op *fuseops.MkDirOp) (err error) {
  1047  
  1048  	fs.mu.RLock()
  1049  	parent := fs.getInodeOrDie(op.Parent)
  1050  	fs.mu.RUnlock()
  1051  
  1052  	// ignore op.Mode for now
  1053  	inode, err := parent.MkDir(op.Name)
  1054  	if err != nil {
  1055  		return err
  1056  	}
  1057  
  1058  	parent.mu.Lock()
  1059  
  1060  	fs.mu.Lock()
  1061  	defer fs.mu.Unlock()
  1062  	fs.insertInode(parent, inode)
  1063  
  1064  	parent.mu.Unlock()
  1065  
  1066  	op.Entry.Child = inode.Id
  1067  	op.Entry.Attributes = inode.InflateAttributes()
  1068  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1069  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
  1070  
  1071  	return
  1072  }
  1073  
  1074  func (fs *Goofys) RmDir(
  1075  	ctx context.Context,
  1076  	op *fuseops.RmDirOp) (err error) {
  1077  
  1078  	fs.mu.RLock()
  1079  	parent := fs.getInodeOrDie(op.Parent)
  1080  	fs.mu.RUnlock()
  1081  
  1082  	err = parent.RmDir(op.Name)
  1083  	parent.logFuse("<-- RmDir", op.Name, err)
  1084  	return
  1085  }
  1086  
  1087  func (fs *Goofys) SetInodeAttributes(
  1088  	ctx context.Context,
  1089  	op *fuseops.SetInodeAttributesOp) (err error) {
  1090  
  1091  	fs.mu.RLock()
  1092  	inode := fs.getInodeOrDie(op.Inode)
  1093  	fs.mu.RUnlock()
  1094  
  1095  	attr, err := inode.GetAttributes()
  1096  	if err == nil {
  1097  		op.Attributes = *attr
  1098  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1099  	}
  1100  	return
  1101  }
  1102  
  1103  func (fs *Goofys) WriteFile(
  1104  	ctx context.Context,
  1105  	op *fuseops.WriteFileOp) (err error) {
  1106  
  1107  	fs.mu.RLock()
  1108  
  1109  	fh, ok := fs.fileHandles[op.Handle]
  1110  	if !ok {
  1111  		panic(fmt.Sprintf("WriteFile: can't find handle %v", op.Handle))
  1112  	}
  1113  	fs.mu.RUnlock()
  1114  
  1115  	err = fh.WriteFile(op.Offset, op.Data)
  1116  
  1117  	return
  1118  }
  1119  
  1120  func (fs *Goofys) Unlink(
  1121  	ctx context.Context,
  1122  	op *fuseops.UnlinkOp) (err error) {
  1123  
  1124  	fs.mu.RLock()
  1125  	parent := fs.getInodeOrDie(op.Parent)
  1126  	fs.mu.RUnlock()
  1127  
  1128  	err = parent.Unlink(op.Name)
  1129  	return
  1130  }
  1131  
  1132  // rename("from", "to") causes the kernel to send lookup of "from" and
  1133  // "to" prior to sending rename to us
  1134  func (fs *Goofys) Rename(
  1135  	ctx context.Context,
  1136  	op *fuseops.RenameOp) (err error) {
  1137  
  1138  	fs.mu.RLock()
  1139  	parent := fs.getInodeOrDie(op.OldParent)
  1140  	newParent := fs.getInodeOrDie(op.NewParent)
  1141  	fs.mu.RUnlock()
  1142  
  1143  	// XXX don't hold the lock the entire time
  1144  	if op.OldParent == op.NewParent {
  1145  		parent.mu.Lock()
  1146  		defer parent.mu.Unlock()
  1147  	} else {
  1148  		// lock ordering to prevent deadlock
  1149  		if op.OldParent < op.NewParent {
  1150  			parent.mu.Lock()
  1151  			newParent.mu.Lock()
  1152  		} else {
  1153  			newParent.mu.Lock()
  1154  			parent.mu.Lock()
  1155  		}
  1156  		defer parent.mu.Unlock()
  1157  		defer newParent.mu.Unlock()
  1158  	}
  1159  
  1160  	err = parent.Rename(op.OldName, newParent, op.NewName)
  1161  	if err != nil {
  1162  		if err == fuse.ENOENT {
  1163  			// if the source doesn't exist, it could be
  1164  			// because this is a new file and we haven't
  1165  			// flushed it yet, pretend that's ok because
  1166  			// when we flush we will handle the rename
  1167  			inode := parent.findChildUnlocked(op.OldName)
  1168  			if inode != nil && atomic.LoadInt32(&inode.fileHandles) != 0 {
  1169  				err = nil
  1170  			}
  1171  		}
  1172  	}
  1173  	if err == nil {
  1174  		inode := parent.findChildUnlocked(op.OldName)
  1175  		if inode != nil {
  1176  			inode.mu.Lock()
  1177  			defer inode.mu.Unlock()
  1178  
  1179  			parent.removeChildUnlocked(inode)
  1180  
  1181  			newNode := newParent.findChildUnlocked(op.NewName)
  1182  			if newNode != nil {
  1183  				// this file's been overwritten, it's
  1184  				// been detached but we can't delete
  1185  				// it just yet, because the kernel
  1186  				// will still send forget ops to us
  1187  				newParent.removeChildUnlocked(newNode)
  1188  				newNode.Parent = nil
  1189  			}
  1190  
  1191  			inode.Name = &op.NewName
  1192  			inode.Parent = newParent
  1193  			newParent.insertChildUnlocked(inode)
  1194  		}
  1195  	}
  1196  	return
  1197  }