github.com/nefixestrada/goofys@v0.23.1/internal/goofys.go (about)

     1  // Copyright 2015 - 2017 Ka-Hing Cheung
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package internal
    16  
    17  import (
    18  	. "github.com/kahing/goofys/api/common"
    19  
    20  	"context"
    21  	"fmt"
    22  	"math/rand"
    23  	"net/url"
    24  	"runtime/debug"
    25  	"strings"
    26  	"sync"
    27  	"syscall"
    28  	"time"
    29  
    30  	"github.com/aws/aws-sdk-go/aws/awserr"
    31  
    32  	"github.com/jacobsa/fuse"
    33  	"github.com/jacobsa/fuse/fuseops"
    34  	"github.com/jacobsa/fuse/fuseutil"
    35  
    36  	"github.com/sirupsen/logrus"
    37  )
    38  
    39  // goofys is a Filey System written in Go. All the backend data is
    40  // stored on S3 as is. It's a Filey System instead of a File System
    41  // because it makes minimal effort at being POSIX
    42  // compliant. Particularly things that are difficult to support on S3
    43  // or would translate into more than one round-trip would either fail
    44  // (rename non-empty dir) or faked (no per-file permission). goofys
    45  // does not have a on disk data cache, and consistency model is
    46  // close-to-open.
    47  
    48  type Goofys struct {
    49  	fuseutil.NotImplementedFileSystem
    50  	bucket string
    51  
    52  	flags *FlagStorage
    53  
    54  	umask uint32
    55  
    56  	gcs       bool
    57  	rootAttrs InodeAttributes
    58  
    59  	bufferPool *BufferPool
    60  
    61  	// A lock protecting the state of the file system struct itself (distinct
    62  	// from per-inode locks). Make sure to see the notes on lock ordering above.
    63  	mu sync.RWMutex
    64  
    65  	// The next inode ID to hand out. We assume that this will never overflow,
    66  	// since even if we were handing out inode IDs at 4 GHz, it would still take
    67  	// over a century to do so.
    68  	//
    69  	// GUARDED_BY(mu)
    70  	nextInodeID fuseops.InodeID
    71  
    72  	// The collection of live inodes, keyed by inode ID. No ID less than
    73  	// fuseops.RootInodeID is ever used.
    74  	//
    75  	// INVARIANT: For all keys k, fuseops.RootInodeID <= k < nextInodeID
    76  	// INVARIANT: For all keys k, inodes[k].ID() == k
    77  	// INVARIANT: inodes[fuseops.RootInodeID] is missing or of type inode.DirInode
    78  	// INVARIANT: For all v, if IsDirName(v.Name()) then v is inode.DirInode
    79  	//
    80  	// GUARDED_BY(mu)
    81  	inodes map[fuseops.InodeID]*Inode
    82  
    83  	nextHandleID fuseops.HandleID
    84  	dirHandles   map[fuseops.HandleID]*DirHandle
    85  
    86  	fileHandles map[fuseops.HandleID]*FileHandle
    87  
    88  	replicators *Ticket
    89  	restorers   *Ticket
    90  
    91  	forgotCnt uint32
    92  }
    93  
    94  var s3Log = GetLogger("s3")
    95  var log = GetLogger("main")
    96  var fuseLog = GetLogger("fuse")
    97  
    98  func NewBackend(bucket string, flags *FlagStorage) (cloud StorageBackend, err error) {
    99  	if flags.Backend == nil {
   100  		flags.Backend = (&S3Config{}).Init()
   101  	}
   102  
   103  	if config, ok := flags.Backend.(*AZBlobConfig); ok {
   104  		cloud, err = NewAZBlob(bucket, config)
   105  	} else if config, ok := flags.Backend.(*ADLv1Config); ok {
   106  		cloud, err = NewADLv1(bucket, flags, config)
   107  	} else if config, ok := flags.Backend.(*ADLv2Config); ok {
   108  		cloud, err = NewADLv2(bucket, flags, config)
   109  	} else if config, ok := flags.Backend.(*S3Config); ok {
   110  		if strings.HasSuffix(flags.Endpoint, "/storage.googleapis.com") {
   111  			cloud, err = NewGCS3(bucket, flags, config)
   112  		} else {
   113  			cloud, err = NewS3(bucket, flags, config)
   114  		}
   115  	} else {
   116  		err = fmt.Errorf("Unknown backend config: %T", flags.Backend)
   117  	}
   118  
   119  	return
   120  }
   121  
   122  type BucketSpec struct {
   123  	Scheme string
   124  	Bucket string
   125  	Prefix string
   126  }
   127  
   128  func ParseBucketSpec(bucket string) (spec BucketSpec, err error) {
   129  	if strings.Index(bucket, "://") != -1 {
   130  		var u *url.URL
   131  		u, err = url.Parse(bucket)
   132  		if err != nil {
   133  			return
   134  		}
   135  
   136  		spec.Scheme = u.Scheme
   137  		spec.Bucket = u.Host
   138  		if u.User != nil {
   139  			// wasb url can be wasb://container@storage-end-point
   140  			// we want to return the entire thing as bucket
   141  			spec.Bucket = u.User.String() + "@" + u.Host
   142  		}
   143  		spec.Prefix = u.Path
   144  	} else {
   145  		spec.Scheme = "s3"
   146  
   147  		colon := strings.Index(bucket, ":")
   148  		if colon != -1 {
   149  			spec.Prefix = bucket[colon+1:]
   150  			spec.Bucket = bucket[0:colon]
   151  		} else {
   152  			spec.Bucket = bucket
   153  		}
   154  	}
   155  
   156  	spec.Prefix = strings.Trim(spec.Prefix, "/")
   157  	if spec.Prefix != "" {
   158  		spec.Prefix += "/"
   159  	}
   160  	return
   161  }
   162  
   163  func NewGoofys(ctx context.Context, bucket string, flags *FlagStorage) *Goofys {
   164  	// Set up the basic struct.
   165  	fs := &Goofys{
   166  		bucket: bucket,
   167  		flags:  flags,
   168  		umask:  0122,
   169  	}
   170  
   171  	var prefix string
   172  	colon := strings.Index(bucket, ":")
   173  	if colon != -1 {
   174  		prefix = bucket[colon+1:]
   175  		prefix = strings.Trim(prefix, "/")
   176  		if prefix != "" {
   177  			prefix += "/"
   178  		}
   179  
   180  		fs.bucket = bucket[0:colon]
   181  		bucket = fs.bucket
   182  	}
   183  
   184  	if flags.DebugS3 {
   185  		s3Log.Level = logrus.DebugLevel
   186  	}
   187  
   188  	cloud, err := NewBackend(bucket, flags)
   189  	if err != nil {
   190  		log.Errorf("Unable to setup backend: %v", err)
   191  		return nil
   192  	}
   193  	_, fs.gcs = cloud.(*GCS3)
   194  
   195  	randomObjectName := prefix + (RandStringBytesMaskImprSrc(32))
   196  	err = cloud.Init(randomObjectName)
   197  	if err != nil {
   198  		log.Errorf("Unable to access '%v': %v", bucket, err)
   199  		return nil
   200  	}
   201  	go cloud.MultipartExpire(&MultipartExpireInput{})
   202  
   203  	now := time.Now()
   204  	fs.rootAttrs = InodeAttributes{
   205  		Size:  4096,
   206  		Mtime: now,
   207  	}
   208  
   209  	fs.bufferPool = BufferPool{}.Init()
   210  
   211  	fs.nextInodeID = fuseops.RootInodeID + 1
   212  	fs.inodes = make(map[fuseops.InodeID]*Inode)
   213  	root := NewInode(fs, nil, PString(""))
   214  	root.Id = fuseops.RootInodeID
   215  	root.ToDir()
   216  	root.dir.cloud = cloud
   217  	root.dir.mountPrefix = prefix
   218  	root.Attributes.Mtime = fs.rootAttrs.Mtime
   219  
   220  	fs.inodes[fuseops.RootInodeID] = root
   221  	fs.addDotAndDotDot(root)
   222  
   223  	fs.nextHandleID = 1
   224  	fs.dirHandles = make(map[fuseops.HandleID]*DirHandle)
   225  
   226  	fs.fileHandles = make(map[fuseops.HandleID]*FileHandle)
   227  
   228  	fs.replicators = Ticket{Total: 16}.Init()
   229  	fs.restorers = Ticket{Total: 20}.Init()
   230  
   231  	return fs
   232  }
   233  
   234  // from https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
   235  func RandStringBytesMaskImprSrc(n int) string {
   236  	const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
   237  	const (
   238  		letterIdxBits = 6                    // 6 bits to represent a letter index
   239  		letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
   240  		letterIdxMax  = 63 / letterIdxBits   // # of letter indices fitting in 63 bits
   241  	)
   242  	src := rand.NewSource(time.Now().UnixNano())
   243  	b := make([]byte, n)
   244  	// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
   245  	for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
   246  		if remain == 0 {
   247  			cache, remain = src.Int63(), letterIdxMax
   248  		}
   249  		if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
   250  			b[i] = letterBytes[idx]
   251  			i--
   252  		}
   253  		cache >>= letterIdxBits
   254  		remain--
   255  	}
   256  
   257  	return string(b)
   258  }
   259  
   260  func (fs *Goofys) SigUsr1() {
   261  	fs.mu.RLock()
   262  
   263  	log.Infof("forgot %v inodes", fs.forgotCnt)
   264  	log.Infof("%v inodes", len(fs.inodes))
   265  	fs.mu.RUnlock()
   266  	debug.FreeOSMemory()
   267  }
   268  
   269  // Find the given inode. Panic if it doesn't exist.
   270  //
   271  // RLOCKS_REQUIRED(fs.mu)
   272  func (fs *Goofys) getInodeOrDie(id fuseops.InodeID) (inode *Inode) {
   273  	inode = fs.inodes[id]
   274  	if inode == nil {
   275  		panic(fmt.Sprintf("Unknown inode: %v", id))
   276  	}
   277  
   278  	return
   279  }
   280  
   281  type Mount struct {
   282  	// Mount Point relative to goofys's root mount.
   283  	name    string
   284  	cloud   StorageBackend
   285  	prefix  string
   286  	mounted bool
   287  }
   288  
   289  func (fs *Goofys) mount(mp *Inode, b *Mount) {
   290  	if b.mounted {
   291  		return
   292  	}
   293  
   294  	name := strings.Trim(b.name, "/")
   295  
   296  	// create path for the mount. AttrTime is set to TIME_MAX so
   297  	// they will never expire and be removed. But DirTime is not
   298  	// so we will still consult the underlining cloud for listing
   299  	// (which will then be merged with the cached result)
   300  
   301  	for {
   302  		idx := strings.Index(name, "/")
   303  		if idx == -1 {
   304  			break
   305  		}
   306  		dirName := name[0:idx]
   307  		name = name[idx+1:]
   308  
   309  		mp.mu.Lock()
   310  		dirInode := mp.findChildUnlocked(dirName)
   311  		if dirInode == nil {
   312  			fs.mu.Lock()
   313  
   314  			dirInode = NewInode(fs, mp, &dirName)
   315  			dirInode.ToDir()
   316  			dirInode.AttrTime = TIME_MAX
   317  
   318  			fs.insertInode(mp, dirInode)
   319  			fs.mu.Unlock()
   320  		}
   321  		mp.mu.Unlock()
   322  		mp = dirInode
   323  	}
   324  
   325  	mp.mu.Lock()
   326  	defer mp.mu.Unlock()
   327  
   328  	prev := mp.findChildUnlocked(name)
   329  	if prev == nil {
   330  		mountInode := NewInode(fs, mp, &name)
   331  		mountInode.ToDir()
   332  		mountInode.dir.cloud = b.cloud
   333  		mountInode.dir.mountPrefix = b.prefix
   334  		mountInode.AttrTime = TIME_MAX
   335  
   336  		fs.mu.Lock()
   337  		defer fs.mu.Unlock()
   338  
   339  		fs.insertInode(mp, mountInode)
   340  		prev = mountInode
   341  	} else {
   342  		if !prev.isDir() {
   343  			panic(fmt.Sprintf("inode %v is not a directory", *prev.FullName()))
   344  		}
   345  
   346  		// This inode might have some cached data from a parent mount.
   347  		// Clear this cache by resetting the DirTime.
   348  		// Note: resetDirTimeRec should be called without holding the lock.
   349  		prev.resetDirTimeRec()
   350  		prev.mu.Lock()
   351  		defer prev.mu.Unlock()
   352  		prev.dir.cloud = b.cloud
   353  		prev.dir.mountPrefix = b.prefix
   354  		prev.AttrTime = TIME_MAX
   355  
   356  	}
   357  	fuseLog.Infof("mounted /%v", *prev.FullName())
   358  	b.mounted = true
   359  }
   360  
   361  func (fs *Goofys) MountAll(mounts []*Mount) {
   362  	fs.mu.RLock()
   363  	root := fs.getInodeOrDie(fuseops.RootInodeID)
   364  	fs.mu.RUnlock()
   365  
   366  	for _, m := range mounts {
   367  		fs.mount(root, m)
   368  	}
   369  }
   370  
   371  func (fs *Goofys) Mount(mount *Mount) {
   372  	fs.mu.RLock()
   373  	root := fs.getInodeOrDie(fuseops.RootInodeID)
   374  	fs.mu.RUnlock()
   375  	fs.mount(root, mount)
   376  }
   377  
   378  func (fs *Goofys) Unmount(mountPoint string) {
   379  	fs.mu.RLock()
   380  	mp := fs.getInodeOrDie(fuseops.RootInodeID)
   381  	fs.mu.RUnlock()
   382  
   383  	fuseLog.Infof("Attempting to unmount %v", mountPoint)
   384  	path := strings.Split(strings.Trim(mountPoint, "/"), "/")
   385  	for _, localName := range path {
   386  		dirInode := mp.findChild(localName)
   387  		if dirInode == nil || !dirInode.isDir() {
   388  			fuseLog.Errorf("Failed to find directory:%v while unmounting %v. "+
   389  				"Ignoring the unmount operation.", localName, mountPoint)
   390  			return
   391  		}
   392  		mp = dirInode
   393  	}
   394  	mp.ResetForUnmount()
   395  	return
   396  }
   397  
   398  func (fs *Goofys) StatFS(
   399  	ctx context.Context,
   400  	op *fuseops.StatFSOp) (err error) {
   401  
   402  	const BLOCK_SIZE = 4096
   403  	const TOTAL_SPACE = 1 * 1024 * 1024 * 1024 * 1024 * 1024 // 1PB
   404  	const TOTAL_BLOCKS = TOTAL_SPACE / BLOCK_SIZE
   405  	const INODES = 1 * 1000 * 1000 * 1000 // 1 billion
   406  	op.BlockSize = BLOCK_SIZE
   407  	op.Blocks = TOTAL_BLOCKS
   408  	op.BlocksFree = TOTAL_BLOCKS
   409  	op.BlocksAvailable = TOTAL_BLOCKS
   410  	op.IoSize = 1 * 1024 * 1024 // 1MB
   411  	op.Inodes = INODES
   412  	op.InodesFree = INODES
   413  	return
   414  }
   415  
   416  func (fs *Goofys) GetInodeAttributes(
   417  	ctx context.Context,
   418  	op *fuseops.GetInodeAttributesOp) (err error) {
   419  
   420  	fs.mu.RLock()
   421  	inode := fs.getInodeOrDie(op.Inode)
   422  	fs.mu.RUnlock()
   423  
   424  	attr, err := inode.GetAttributes()
   425  	if err == nil {
   426  		op.Attributes = *attr
   427  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   428  	}
   429  
   430  	return
   431  }
   432  
   433  func (fs *Goofys) GetXattr(ctx context.Context,
   434  	op *fuseops.GetXattrOp) (err error) {
   435  	fs.mu.RLock()
   436  	inode := fs.getInodeOrDie(op.Inode)
   437  	fs.mu.RUnlock()
   438  
   439  	value, err := inode.GetXattr(op.Name)
   440  	if err != nil {
   441  		return
   442  	}
   443  
   444  	op.BytesRead = len(value)
   445  
   446  	if len(op.Dst) < op.BytesRead {
   447  		return syscall.ERANGE
   448  	} else {
   449  		copy(op.Dst, value)
   450  		return
   451  	}
   452  }
   453  
   454  func (fs *Goofys) ListXattr(ctx context.Context,
   455  	op *fuseops.ListXattrOp) (err error) {
   456  	fs.mu.RLock()
   457  	inode := fs.getInodeOrDie(op.Inode)
   458  	fs.mu.RUnlock()
   459  
   460  	xattrs, err := inode.ListXattr()
   461  
   462  	ncopied := 0
   463  
   464  	for _, name := range xattrs {
   465  		buf := op.Dst[ncopied:]
   466  		nlen := len(name) + 1
   467  
   468  		if nlen <= len(buf) {
   469  			copy(buf, name)
   470  			ncopied += nlen
   471  			buf[nlen-1] = '\x00'
   472  		}
   473  
   474  		op.BytesRead += nlen
   475  	}
   476  
   477  	if ncopied < op.BytesRead {
   478  		err = syscall.ERANGE
   479  	}
   480  
   481  	return
   482  }
   483  
   484  func (fs *Goofys) RemoveXattr(ctx context.Context,
   485  	op *fuseops.RemoveXattrOp) (err error) {
   486  	fs.mu.RLock()
   487  	inode := fs.getInodeOrDie(op.Inode)
   488  	fs.mu.RUnlock()
   489  
   490  	err = inode.RemoveXattr(op.Name)
   491  
   492  	return
   493  }
   494  
   495  func (fs *Goofys) SetXattr(ctx context.Context,
   496  	op *fuseops.SetXattrOp) (err error) {
   497  	fs.mu.RLock()
   498  	inode := fs.getInodeOrDie(op.Inode)
   499  	fs.mu.RUnlock()
   500  
   501  	err = inode.SetXattr(op.Name, op.Value, op.Flags)
   502  	return
   503  }
   504  
   505  func mapHttpError(status int) error {
   506  	switch status {
   507  	case 400:
   508  		return fuse.EINVAL
   509  	case 401:
   510  		return syscall.EACCES
   511  	case 403:
   512  		return syscall.EACCES
   513  	case 404:
   514  		return fuse.ENOENT
   515  	case 405:
   516  		return syscall.ENOTSUP
   517  	case 429:
   518  		return syscall.EAGAIN
   519  	case 500:
   520  		return syscall.EAGAIN
   521  	default:
   522  		return nil
   523  	}
   524  }
   525  
   526  func mapAwsError(err error) error {
   527  	if err == nil {
   528  		return nil
   529  	}
   530  
   531  	if awsErr, ok := err.(awserr.Error); ok {
   532  		switch awsErr.Code() {
   533  		case "BucketRegionError":
   534  			// don't need to log anything, we should detect region after
   535  			return err
   536  		case "NoSuchBucket":
   537  			return syscall.ENXIO
   538  		case "BucketAlreadyOwnedByYou":
   539  			return fuse.EEXIST
   540  		}
   541  
   542  		if reqErr, ok := err.(awserr.RequestFailure); ok {
   543  			// A service error occurred
   544  			err = mapHttpError(reqErr.StatusCode())
   545  			if err != nil {
   546  				return err
   547  			} else {
   548  				s3Log.Errorf("http=%v %v s3=%v request=%v\n",
   549  					reqErr.StatusCode(), reqErr.Message(),
   550  					awsErr.Code(), reqErr.RequestID())
   551  				return reqErr
   552  			}
   553  		} else {
   554  			// Generic AWS Error with Code, Message, and original error (if any)
   555  			s3Log.Errorf("code=%v msg=%v, err=%v\n", awsErr.Code(), awsErr.Message(), awsErr.OrigErr())
   556  			return awsErr
   557  		}
   558  	} else {
   559  		return err
   560  	}
   561  }
   562  
   563  // note that this is NOT the same as url.PathEscape in golang 1.8,
   564  // as this preserves / and url.PathEscape converts / to %2F
   565  func pathEscape(path string) string {
   566  	u := url.URL{Path: path}
   567  	return u.EscapedPath()
   568  }
   569  
   570  func (fs *Goofys) allocateInodeId() (id fuseops.InodeID) {
   571  	id = fs.nextInodeID
   572  	fs.nextInodeID++
   573  	return
   574  }
   575  
   576  func expired(cache time.Time, ttl time.Duration) bool {
   577  	now := time.Now()
   578  	if cache.After(now) {
   579  		return false
   580  	}
   581  	return !cache.Add(ttl).After(now)
   582  }
   583  
   584  func (fs *Goofys) LookUpInode(
   585  	ctx context.Context,
   586  	op *fuseops.LookUpInodeOp) (err error) {
   587  
   588  	var inode *Inode
   589  	var ok bool
   590  	defer func() { fuseLog.Debugf("<-- LookUpInode %v %v %v", op.Parent, op.Name, err) }()
   591  
   592  	fs.mu.RLock()
   593  	parent := fs.getInodeOrDie(op.Parent)
   594  	fs.mu.RUnlock()
   595  
   596  	parent.mu.Lock()
   597  	inode = parent.findChildUnlocked(op.Name)
   598  	if inode != nil {
   599  		ok = true
   600  		inode.Ref()
   601  
   602  		if expired(inode.AttrTime, fs.flags.StatCacheTTL) {
   603  			ok = false
   604  			if inode.fileHandles != 0 {
   605  				// we have an open file handle, object
   606  				// in S3 may not represent the true
   607  				// state of the file anyway, so just
   608  				// return what we know which is
   609  				// potentially more accurate
   610  				ok = true
   611  			} else {
   612  				inode.logFuse("lookup expired")
   613  			}
   614  		}
   615  	} else {
   616  		ok = false
   617  	}
   618  	parent.mu.Unlock()
   619  
   620  	if !ok {
   621  		var newInode *Inode
   622  
   623  		newInode, err = parent.LookUp(op.Name)
   624  		if err == fuse.ENOENT && inode != nil && inode.isDir() {
   625  			// we may not be able to look up an implicit
   626  			// dir if all the children are removed, so we
   627  			// just pretend this dir is still around
   628  			err = nil
   629  		} else if err != nil {
   630  			if inode != nil {
   631  				// just kidding! pretend we didn't up the ref
   632  				fs.mu.Lock()
   633  				defer fs.mu.Unlock()
   634  
   635  				stale := inode.DeRef(1)
   636  				if stale {
   637  					delete(fs.inodes, inode.Id)
   638  					parent.removeChild(inode)
   639  				}
   640  			}
   641  			return err
   642  		}
   643  
   644  		if inode == nil {
   645  			parent.mu.Lock()
   646  			// check again if it's there, could have been
   647  			// added by another lookup or readdir
   648  			inode = parent.findChildUnlocked(op.Name)
   649  			if inode == nil {
   650  				fs.mu.Lock()
   651  				inode = newInode
   652  				fs.insertInode(parent, inode)
   653  				fs.mu.Unlock()
   654  			}
   655  			parent.mu.Unlock()
   656  		} else {
   657  			if newInode != nil {
   658  				if newInode.Attributes.Mtime.IsZero() {
   659  					newInode.Attributes.Mtime = inode.Attributes.Mtime
   660  				}
   661  				inode.Attributes = newInode.Attributes
   662  			}
   663  			inode.AttrTime = time.Now()
   664  		}
   665  	}
   666  
   667  	op.Entry.Child = inode.Id
   668  	op.Entry.Attributes = inode.InflateAttributes()
   669  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   670  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
   671  
   672  	return
   673  }
   674  
   675  // LOCKS_REQUIRED(fs.mu)
   676  // LOCKS_REQUIRED(parent.mu)
   677  func (fs *Goofys) insertInode(parent *Inode, inode *Inode) {
   678  	addInode := false
   679  	if *inode.Name == "." {
   680  		inode.Id = parent.Id
   681  	} else if *inode.Name == ".." {
   682  		inode.Id = fuseops.InodeID(fuseops.RootInodeID)
   683  		if parent.Parent != nil {
   684  			inode.Id = parent.Parent.Id
   685  		}
   686  	} else {
   687  		if inode.Id != 0 {
   688  			panic(fmt.Sprintf("inode id is set: %v %v", *inode.Name, inode.Id))
   689  		}
   690  		inode.Id = fs.allocateInodeId()
   691  		addInode = true
   692  	}
   693  	parent.insertChildUnlocked(inode)
   694  	if addInode {
   695  		fs.inodes[inode.Id] = inode
   696  
   697  		// if we are inserting a new directory, also create
   698  		// the child . and ..
   699  		if inode.isDir() {
   700  			fs.addDotAndDotDot(inode)
   701  		}
   702  	}
   703  }
   704  
   705  func (fs *Goofys) addDotAndDotDot(dir *Inode) {
   706  	dot := NewInode(fs, dir, PString("."))
   707  	dot.ToDir()
   708  	dot.AttrTime = TIME_MAX
   709  	fs.insertInode(dir, dot)
   710  
   711  	dot = NewInode(fs, dir, PString(".."))
   712  	dot.ToDir()
   713  	dot.AttrTime = TIME_MAX
   714  	fs.insertInode(dir, dot)
   715  }
   716  
   717  func (fs *Goofys) ForgetInode(
   718  	ctx context.Context,
   719  	op *fuseops.ForgetInodeOp) (err error) {
   720  
   721  	fs.mu.RLock()
   722  	inode := fs.getInodeOrDie(op.Inode)
   723  	fs.mu.RUnlock()
   724  
   725  	if inode.Parent != nil {
   726  		inode.Parent.mu.Lock()
   727  		defer inode.Parent.mu.Unlock()
   728  	}
   729  	stale := inode.DeRef(op.N)
   730  
   731  	if stale {
   732  		fs.mu.Lock()
   733  		defer fs.mu.Unlock()
   734  
   735  		delete(fs.inodes, op.Inode)
   736  		fs.forgotCnt += 1
   737  
   738  		if inode.Parent != nil {
   739  			inode.Parent.removeChildUnlocked(inode)
   740  		}
   741  	}
   742  
   743  	return
   744  }
   745  
   746  func (fs *Goofys) OpenDir(
   747  	ctx context.Context,
   748  	op *fuseops.OpenDirOp) (err error) {
   749  	fs.mu.Lock()
   750  
   751  	handleID := fs.nextHandleID
   752  	fs.nextHandleID++
   753  
   754  	in := fs.getInodeOrDie(op.Inode)
   755  	fs.mu.Unlock()
   756  
   757  	// XXX/is this a dir?
   758  	dh := in.OpenDir()
   759  
   760  	fs.mu.Lock()
   761  	defer fs.mu.Unlock()
   762  
   763  	fs.dirHandles[handleID] = dh
   764  	op.Handle = handleID
   765  
   766  	return
   767  }
   768  
   769  func makeDirEntry(en *DirHandleEntry) fuseutil.Dirent {
   770  	return fuseutil.Dirent{
   771  		Name:   en.Name,
   772  		Type:   en.Type,
   773  		Inode:  en.Inode,
   774  		Offset: en.Offset,
   775  	}
   776  }
   777  
   778  func (fs *Goofys) ReadDir(
   779  	ctx context.Context,
   780  	op *fuseops.ReadDirOp) (err error) {
   781  
   782  	// Find the handle.
   783  	fs.mu.RLock()
   784  	dh := fs.dirHandles[op.Handle]
   785  	fs.mu.RUnlock()
   786  
   787  	if dh == nil {
   788  		panic(fmt.Sprintf("can't find dh=%v", op.Handle))
   789  	}
   790  
   791  	inode := dh.inode
   792  	inode.logFuse("ReadDir", op.Offset)
   793  
   794  	dh.mu.Lock()
   795  	defer dh.mu.Unlock()
   796  
   797  	for i := op.Offset; ; i++ {
   798  		e, err := dh.ReadDir(i)
   799  		if err != nil {
   800  			return err
   801  		}
   802  		if e == nil {
   803  			break
   804  		}
   805  
   806  		if e.Inode == 0 {
   807  			panic(fmt.Sprintf("unset inode %v", e.Name))
   808  		}
   809  
   810  		n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], makeDirEntry(e))
   811  		if n == 0 {
   812  			break
   813  		}
   814  
   815  		dh.inode.logFuse("<-- ReadDir", e.Name, e.Offset)
   816  
   817  		op.BytesRead += n
   818  	}
   819  
   820  	return
   821  }
   822  
   823  func (fs *Goofys) ReleaseDirHandle(
   824  	ctx context.Context,
   825  	op *fuseops.ReleaseDirHandleOp) (err error) {
   826  
   827  	fs.mu.Lock()
   828  	defer fs.mu.Unlock()
   829  
   830  	dh := fs.dirHandles[op.Handle]
   831  	dh.CloseDir()
   832  
   833  	fuseLog.Debugln("ReleaseDirHandle", *dh.inode.FullName())
   834  
   835  	delete(fs.dirHandles, op.Handle)
   836  
   837  	return
   838  }
   839  
   840  func (fs *Goofys) OpenFile(
   841  	ctx context.Context,
   842  	op *fuseops.OpenFileOp) (err error) {
   843  	fs.mu.RLock()
   844  	in := fs.getInodeOrDie(op.Inode)
   845  	fs.mu.RUnlock()
   846  
   847  	fh, err := in.OpenFile(op.Metadata)
   848  	if err != nil {
   849  		return
   850  	}
   851  
   852  	fs.mu.Lock()
   853  	defer fs.mu.Unlock()
   854  
   855  	handleID := fs.nextHandleID
   856  	fs.nextHandleID++
   857  
   858  	fs.fileHandles[handleID] = fh
   859  
   860  	op.Handle = handleID
   861  	op.KeepPageCache = true
   862  
   863  	return
   864  }
   865  
   866  func (fs *Goofys) ReadFile(
   867  	ctx context.Context,
   868  	op *fuseops.ReadFileOp) (err error) {
   869  
   870  	fs.mu.RLock()
   871  	fh := fs.fileHandles[op.Handle]
   872  	fs.mu.RUnlock()
   873  
   874  	op.BytesRead, err = fh.ReadFile(op.Offset, op.Dst)
   875  
   876  	return
   877  }
   878  
   879  func (fs *Goofys) SyncFile(
   880  	ctx context.Context,
   881  	op *fuseops.SyncFileOp) (err error) {
   882  
   883  	// intentionally ignored, so that write()/sync()/write() works
   884  	// see https://github.com/kahing/goofys/issues/154
   885  	return
   886  }
   887  
   888  func (fs *Goofys) FlushFile(
   889  	ctx context.Context,
   890  	op *fuseops.FlushFileOp) (err error) {
   891  
   892  	fs.mu.RLock()
   893  	fh := fs.fileHandles[op.Handle]
   894  	fs.mu.RUnlock()
   895  
   896  	// If the file handle has a tgid, then flush the file only if the
   897  	// incoming request's tgid matches the tgid in the file handle.
   898  	// This check helps us with scenarios like https://github.com/kahing/goofys/issues/273
   899  	// Also see goofys_test.go:TestClientForkExec.
   900  	if fh.Tgid != nil {
   901  		tgid, err := GetTgid(op.Metadata.Pid)
   902  		if err != nil {
   903  			fh.inode.logFuse("<-- FlushFile",
   904  				fmt.Sprintf("Failed to retrieve tgid from op.Metadata.Pid. FlushFileOp:%#v, err:%v",
   905  					op, err))
   906  			return fuse.EIO
   907  		}
   908  		if *fh.Tgid != *tgid {
   909  			fh.inode.logFuse("<-- FlushFile",
   910  				"Operation ignored",
   911  				fmt.Sprintf("fh.Pid:%v != tgid:%v, op:%#v", *fh.Tgid, *tgid, op))
   912  			return nil
   913  		}
   914  	}
   915  
   916  	err = fh.FlushFile()
   917  	if err != nil {
   918  		// if we returned success from creat() earlier
   919  		// linux may think this file exists even when it doesn't,
   920  		// until TypeCacheTTL is over
   921  		// TODO: figure out a way to make the kernel forget this inode
   922  		// see TestWriteAnonymousFuse
   923  		fs.mu.RLock()
   924  		inode := fs.getInodeOrDie(op.Inode)
   925  		fs.mu.RUnlock()
   926  
   927  		if inode.KnownSize == nil {
   928  			inode.AttrTime = time.Time{}
   929  		}
   930  
   931  	}
   932  	fh.inode.logFuse("<-- FlushFile", err, op.Handle, op.Inode)
   933  	return
   934  }
   935  
   936  func (fs *Goofys) ReleaseFileHandle(
   937  	ctx context.Context,
   938  	op *fuseops.ReleaseFileHandleOp) (err error) {
   939  	fs.mu.Lock()
   940  	defer fs.mu.Unlock()
   941  	fh := fs.fileHandles[op.Handle]
   942  	fh.Release()
   943  
   944  	fuseLog.Debugln("ReleaseFileHandle", *fh.inode.FullName(), op.Handle, fh.inode.Id)
   945  
   946  	delete(fs.fileHandles, op.Handle)
   947  
   948  	// try to compact heap
   949  	//fs.bufferPool.MaybeGC()
   950  	return
   951  }
   952  
   953  func (fs *Goofys) CreateFile(
   954  	ctx context.Context,
   955  	op *fuseops.CreateFileOp) (err error) {
   956  
   957  	fs.mu.RLock()
   958  	parent := fs.getInodeOrDie(op.Parent)
   959  	fs.mu.RUnlock()
   960  
   961  	inode, fh := parent.Create(op.Name, op.Metadata)
   962  
   963  	parent.mu.Lock()
   964  
   965  	fs.mu.Lock()
   966  	defer fs.mu.Unlock()
   967  	fs.insertInode(parent, inode)
   968  
   969  	parent.mu.Unlock()
   970  
   971  	op.Entry.Child = inode.Id
   972  	op.Entry.Attributes = inode.InflateAttributes()
   973  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   974  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
   975  
   976  	// Allocate a handle.
   977  	handleID := fs.nextHandleID
   978  	fs.nextHandleID++
   979  
   980  	fs.fileHandles[handleID] = fh
   981  
   982  	op.Handle = handleID
   983  
   984  	inode.logFuse("<-- CreateFile")
   985  
   986  	return
   987  }
   988  
   989  func (fs *Goofys) MkDir(
   990  	ctx context.Context,
   991  	op *fuseops.MkDirOp) (err error) {
   992  
   993  	fs.mu.RLock()
   994  	parent := fs.getInodeOrDie(op.Parent)
   995  	fs.mu.RUnlock()
   996  
   997  	// ignore op.Mode for now
   998  	inode, err := parent.MkDir(op.Name)
   999  	if err != nil {
  1000  		return err
  1001  	}
  1002  
  1003  	parent.mu.Lock()
  1004  
  1005  	fs.mu.Lock()
  1006  	defer fs.mu.Unlock()
  1007  	fs.insertInode(parent, inode)
  1008  
  1009  	parent.mu.Unlock()
  1010  
  1011  	op.Entry.Child = inode.Id
  1012  	op.Entry.Attributes = inode.InflateAttributes()
  1013  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1014  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
  1015  
  1016  	return
  1017  }
  1018  
  1019  func (fs *Goofys) RmDir(
  1020  	ctx context.Context,
  1021  	op *fuseops.RmDirOp) (err error) {
  1022  
  1023  	fs.mu.RLock()
  1024  	parent := fs.getInodeOrDie(op.Parent)
  1025  	fs.mu.RUnlock()
  1026  
  1027  	err = parent.RmDir(op.Name)
  1028  	parent.logFuse("<-- RmDir", op.Name, err)
  1029  	return
  1030  }
  1031  
  1032  func (fs *Goofys) SetInodeAttributes(
  1033  	ctx context.Context,
  1034  	op *fuseops.SetInodeAttributesOp) (err error) {
  1035  
  1036  	fs.mu.RLock()
  1037  	inode := fs.getInodeOrDie(op.Inode)
  1038  	fs.mu.RUnlock()
  1039  
  1040  	attr, err := inode.GetAttributes()
  1041  	if err == nil {
  1042  		op.Attributes = *attr
  1043  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1044  	}
  1045  	return
  1046  }
  1047  
  1048  func (fs *Goofys) WriteFile(
  1049  	ctx context.Context,
  1050  	op *fuseops.WriteFileOp) (err error) {
  1051  
  1052  	fs.mu.RLock()
  1053  
  1054  	fh, ok := fs.fileHandles[op.Handle]
  1055  	if !ok {
  1056  		panic(fmt.Sprintf("WriteFile: can't find handle %v", op.Handle))
  1057  	}
  1058  	fs.mu.RUnlock()
  1059  
  1060  	err = fh.WriteFile(op.Offset, op.Data)
  1061  
  1062  	return
  1063  }
  1064  
  1065  func (fs *Goofys) Unlink(
  1066  	ctx context.Context,
  1067  	op *fuseops.UnlinkOp) (err error) {
  1068  
  1069  	fs.mu.RLock()
  1070  	parent := fs.getInodeOrDie(op.Parent)
  1071  	fs.mu.RUnlock()
  1072  
  1073  	err = parent.Unlink(op.Name)
  1074  	return
  1075  }
  1076  
  1077  // rename("from", "to") causes the kernel to send lookup of "from" and
  1078  // "to" prior to sending rename to us
  1079  func (fs *Goofys) Rename(
  1080  	ctx context.Context,
  1081  	op *fuseops.RenameOp) (err error) {
  1082  
  1083  	fs.mu.RLock()
  1084  	parent := fs.getInodeOrDie(op.OldParent)
  1085  	newParent := fs.getInodeOrDie(op.NewParent)
  1086  	fs.mu.RUnlock()
  1087  
  1088  	// XXX don't hold the lock the entire time
  1089  	if op.OldParent == op.NewParent {
  1090  		parent.mu.Lock()
  1091  		defer parent.mu.Unlock()
  1092  	} else {
  1093  		// lock ordering to prevent deadlock
  1094  		if op.OldParent < op.NewParent {
  1095  			parent.mu.Lock()
  1096  			newParent.mu.Lock()
  1097  		} else {
  1098  			newParent.mu.Lock()
  1099  			parent.mu.Lock()
  1100  		}
  1101  		defer parent.mu.Unlock()
  1102  		defer newParent.mu.Unlock()
  1103  	}
  1104  
  1105  	err = parent.Rename(op.OldName, newParent, op.NewName)
  1106  	if err != nil {
  1107  		if err == fuse.ENOENT {
  1108  			// if the source doesn't exist, it could be
  1109  			// because this is a new file and we haven't
  1110  			// flushed it yet, pretend that's ok because
  1111  			// when we flush we will handle the rename
  1112  			inode := parent.findChildUnlocked(op.OldName)
  1113  			if inode != nil && inode.fileHandles != 0 {
  1114  				err = nil
  1115  			}
  1116  		}
  1117  	}
  1118  	if err == nil {
  1119  		inode := parent.findChildUnlocked(op.OldName)
  1120  		if inode != nil {
  1121  			inode.mu.Lock()
  1122  			defer inode.mu.Unlock()
  1123  
  1124  			parent.removeChildUnlocked(inode)
  1125  
  1126  			newNode := newParent.findChildUnlocked(op.NewName)
  1127  			if newNode != nil {
  1128  				// this file's been overwritten, it's
  1129  				// been detached but we can't delete
  1130  				// it just yet, because the kernel
  1131  				// will still send forget ops to us
  1132  				newParent.removeChildUnlocked(newNode)
  1133  				newNode.Parent = nil
  1134  			}
  1135  
  1136  			inode.Name = &op.NewName
  1137  			inode.Parent = newParent
  1138  			newParent.insertChildUnlocked(inode)
  1139  		}
  1140  	}
  1141  	return
  1142  }