github.com/del680202/goofys@v0.19.1-0.20180727070818-6a609fafa266/internal/goofys.go (about)

     1  // Copyright 2015 - 2017 Ka-Hing Cheung
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package internal
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math/rand"
    21  	"mime"
    22  	"net/http"
    23  	"net/url"
    24  	"runtime/debug"
    25  	"strconv"
    26  	"strings"
    27  	"sync"
    28  	"syscall"
    29  	"time"
    30  
    31  	"github.com/aws/aws-sdk-go/aws"
    32  	"github.com/aws/aws-sdk-go/aws/awserr"
    33  	"github.com/aws/aws-sdk-go/aws/corehandlers"
    34  	"github.com/aws/aws-sdk-go/aws/credentials"
    35  	"github.com/aws/aws-sdk-go/aws/request"
    36  	"github.com/aws/aws-sdk-go/aws/session"
    37  	"github.com/aws/aws-sdk-go/service/s3"
    38  
    39  	"github.com/jacobsa/fuse"
    40  	"github.com/jacobsa/fuse/fuseops"
    41  	"github.com/jacobsa/fuse/fuseutil"
    42  
    43  	"github.com/sirupsen/logrus"
    44  )
    45  
    46  // goofys is a Filey System written in Go. All the backend data is
    47  // stored on S3 as is. It's a Filey System instead of a File System
    48  // because it makes minimal effort at being POSIX
    49  // compliant. Particularly things that are difficult to support on S3
    50  // or would translate into more than one round-trip would either fail
    51  // (rename non-empty dir) or faked (no per-file permission). goofys
    52  // does not have a on disk data cache, and consistency model is
    53  // close-to-open.
    54  
    55  type Goofys struct {
    56  	fuseutil.NotImplementedFileSystem
    57  	bucket string
    58  	prefix string
    59  
    60  	flags *FlagStorage
    61  
    62  	umask uint32
    63  
    64  	awsConfig *aws.Config
    65  	sess      *session.Session
    66  	s3        *s3.S3
    67  	v2Signer  bool
    68  	gcs       bool
    69  	sseType   string
    70  	rootAttrs InodeAttributes
    71  
    72  	bufferPool *BufferPool
    73  
    74  	// A lock protecting the state of the file system struct itself (distinct
    75  	// from per-inode locks). Make sure to see the notes on lock ordering above.
    76  	mu sync.Mutex
    77  
    78  	// The next inode ID to hand out. We assume that this will never overflow,
    79  	// since even if we were handing out inode IDs at 4 GHz, it would still take
    80  	// over a century to do so.
    81  	//
    82  	// GUARDED_BY(mu)
    83  	nextInodeID fuseops.InodeID
    84  
    85  	// The collection of live inodes, keyed by inode ID. No ID less than
    86  	// fuseops.RootInodeID is ever used.
    87  	//
    88  	// INVARIANT: For all keys k, fuseops.RootInodeID <= k < nextInodeID
    89  	// INVARIANT: For all keys k, inodes[k].ID() == k
    90  	// INVARIANT: inodes[fuseops.RootInodeID] is missing or of type inode.DirInode
    91  	// INVARIANT: For all v, if IsDirName(v.Name()) then v is inode.DirInode
    92  	//
    93  	// GUARDED_BY(mu)
    94  	inodes map[fuseops.InodeID]*Inode
    95  
    96  	nextHandleID fuseops.HandleID
    97  	dirHandles   map[fuseops.HandleID]*DirHandle
    98  
    99  	fileHandles map[fuseops.HandleID]*FileHandle
   100  
   101  	replicators *Ticket
   102  	restorers   *Ticket
   103  
   104  	forgotCnt uint32
   105  }
   106  
   107  var s3Log = GetLogger("s3")
   108  
   109  func NewGoofys(ctx context.Context, bucket string, awsConfig *aws.Config, flags *FlagStorage) *Goofys {
   110  	// Set up the basic struct.
   111  	fs := &Goofys{
   112  		bucket: bucket,
   113  		flags:  flags,
   114  		umask:  0122,
   115  	}
   116  
   117  	colon := strings.Index(bucket, ":")
   118  	if colon != -1 {
   119  		fs.prefix = bucket[colon+1:]
   120  		fs.prefix = strings.Trim(fs.prefix, "/")
   121  		fs.prefix += "/"
   122  
   123  		fs.bucket = bucket[0:colon]
   124  		bucket = fs.bucket
   125  	}
   126  
   127  	if flags.DebugS3 {
   128  		awsConfig.LogLevel = aws.LogLevel(aws.LogDebug | aws.LogDebugWithRequestErrors)
   129  		s3Log.Level = logrus.DebugLevel
   130  	}
   131  
   132  	if strings.HasSuffix(flags.Endpoint, "/storage.googleapis.com") {
   133  		fs.gcs = true
   134  	}
   135  
   136  	fs.awsConfig = awsConfig
   137  	fs.sess = session.New(awsConfig)
   138  	fs.s3 = fs.newS3()
   139  
   140  	var isAws bool
   141  	var err error
   142  	if !fs.flags.RegionSet {
   143  		err, isAws = fs.detectBucketLocationByHEAD()
   144  		if err == nil {
   145  			// we detected a region header, this is probably AWS S3,
   146  			// or we can use anonymous access, or both
   147  			fs.sess = session.New(awsConfig)
   148  			fs.s3 = fs.newS3()
   149  		} else if err == fuse.ENOENT {
   150  			log.Errorf("bucket %v does not exist", fs.bucket)
   151  			return nil
   152  		} else {
   153  			// this is NOT AWS, we expect the request to fail with 403 if this is not
   154  			// an anonymous bucket
   155  			if err != syscall.EACCES {
   156  				log.Errorf("Unable to access '%v': %v", fs.bucket, err)
   157  			}
   158  		}
   159  	}
   160  
   161  	// try again with the credential to make sure
   162  	err = mapAwsError(fs.testBucket())
   163  	if err != nil {
   164  		if !isAws {
   165  			// EMC returns 403 because it doesn't support v4 signing
   166  			// swift3, ceph-s3 returns 400
   167  			// Amplidata just gives up and return 500
   168  			if err == syscall.EACCES || err == fuse.EINVAL || err == syscall.EAGAIN {
   169  				fs.fallbackV2Signer()
   170  				err = mapAwsError(fs.testBucket())
   171  			}
   172  		}
   173  
   174  		if err != nil {
   175  			log.Errorf("Unable to access '%v': %v", fs.bucket, err)
   176  			return nil
   177  		}
   178  	}
   179  
   180  	go fs.cleanUpOldMPU()
   181  
   182  	if flags.UseKMS {
   183  		//SSE header string for KMS server-side encryption (SSE-KMS)
   184  		fs.sseType = s3.ServerSideEncryptionAwsKms
   185  	} else if flags.UseSSE {
   186  		//SSE header string for non-KMS server-side encryption (SSE-S3)
   187  		fs.sseType = s3.ServerSideEncryptionAes256
   188  	}
   189  
   190  	now := time.Now()
   191  	fs.rootAttrs = InodeAttributes{
   192  		Size:  4096,
   193  		Mtime: now,
   194  	}
   195  
   196  	fs.bufferPool = BufferPool{}.Init()
   197  
   198  	fs.nextInodeID = fuseops.RootInodeID + 1
   199  	fs.inodes = make(map[fuseops.InodeID]*Inode)
   200  	root := NewInode(fs, nil, aws.String(""), aws.String(""))
   201  	root.Id = fuseops.RootInodeID
   202  	root.ToDir()
   203  	root.Attributes.Mtime = fs.rootAttrs.Mtime
   204  
   205  	fs.inodes[fuseops.RootInodeID] = root
   206  
   207  	fs.nextHandleID = 1
   208  	fs.dirHandles = make(map[fuseops.HandleID]*DirHandle)
   209  
   210  	fs.fileHandles = make(map[fuseops.HandleID]*FileHandle)
   211  
   212  	fs.replicators = Ticket{Total: 16}.Init()
   213  	fs.restorers = Ticket{Total: 8}.Init()
   214  
   215  	return fs
   216  }
   217  
   218  func (fs *Goofys) fallbackV2Signer() (err error) {
   219  	if fs.v2Signer {
   220  		return fuse.EINVAL
   221  	}
   222  
   223  	s3Log.Infoln("Falling back to v2 signer")
   224  	fs.v2Signer = true
   225  	fs.s3 = fs.newS3()
   226  	return
   227  }
   228  
   229  func addAcceptEncoding(req *request.Request) {
   230  	if req.HTTPRequest.Method == "GET" {
   231  		// we need "Accept-Encoding: identity" so that objects
   232  		// with content-encoding won't be automatically
   233  		// deflated, but we don't want to sign it because GCS
   234  		// doesn't like it
   235  		req.HTTPRequest.Header.Set("Accept-Encoding", "identity")
   236  	}
   237  }
   238  
   239  func (fs *Goofys) newS3() *s3.S3 {
   240  	svc := s3.New(fs.sess)
   241  	if fs.v2Signer {
   242  		svc.Handlers.Sign.Clear()
   243  		svc.Handlers.Sign.PushBack(SignV2)
   244  		svc.Handlers.Sign.PushBackNamed(corehandlers.BuildContentLengthHandler)
   245  	}
   246  	svc.Handlers.Sign.PushBack(addAcceptEncoding)
   247  	return svc
   248  }
   249  
   250  // from https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-golang
   251  func RandStringBytesMaskImprSrc(n int) string {
   252  	const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
   253  	const (
   254  		letterIdxBits = 6                    // 6 bits to represent a letter index
   255  		letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
   256  		letterIdxMax  = 63 / letterIdxBits   // # of letter indices fitting in 63 bits
   257  	)
   258  	src := rand.NewSource(time.Now().UnixNano())
   259  	b := make([]byte, n)
   260  	// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
   261  	for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
   262  		if remain == 0 {
   263  			cache, remain = src.Int63(), letterIdxMax
   264  		}
   265  		if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
   266  			b[i] = letterBytes[idx]
   267  			i--
   268  		}
   269  		cache >>= letterIdxBits
   270  		remain--
   271  	}
   272  
   273  	return string(b)
   274  }
   275  
   276  func (fs *Goofys) testBucket() (err error) {
   277  	randomObjectName := fs.key(RandStringBytesMaskImprSrc(32))
   278  
   279  	_, err = fs.s3.HeadObject(&s3.HeadObjectInput{Bucket: &fs.bucket, Key: randomObjectName})
   280  	if err != nil {
   281  		err = mapAwsError(err)
   282  		if err == fuse.ENOENT {
   283  			err = nil
   284  		}
   285  	}
   286  
   287  	return
   288  }
   289  
   290  func (fs *Goofys) detectBucketLocationByHEAD() (err error, isAws bool) {
   291  	u := url.URL{
   292  		Scheme: "https",
   293  		Host:   "s3.amazonaws.com",
   294  		Path:   fs.bucket,
   295  	}
   296  
   297  	if fs.awsConfig.Endpoint != nil {
   298  		endpoint, err := url.Parse(*fs.awsConfig.Endpoint)
   299  		if err != nil {
   300  			return err, false
   301  		}
   302  
   303  		u.Scheme = endpoint.Scheme
   304  		u.Host = endpoint.Host
   305  	}
   306  
   307  	var req *http.Request
   308  	var resp *http.Response
   309  
   310  	req, err = http.NewRequest("HEAD", u.String(), nil)
   311  	if err != nil {
   312  		return
   313  	}
   314  
   315  	allowFails := 3
   316  	for i := 0; i < allowFails; i++ {
   317  		resp, err = http.DefaultTransport.RoundTrip(req)
   318  		if err != nil {
   319  			return
   320  		}
   321  		if resp.StatusCode < 500 {
   322  			break
   323  		} else if resp.StatusCode == 503 && resp.Status == "503 Slow Down" {
   324  			time.Sleep(time.Duration(i+1) * time.Second)
   325  			// allow infinite retries for 503 slow down
   326  			allowFails += 1
   327  		}
   328  	}
   329  
   330  	region := resp.Header["X-Amz-Bucket-Region"]
   331  	server := resp.Header["Server"]
   332  
   333  	s3Log.Debugf("HEAD %v = %v %v", u.String(), resp.StatusCode, region)
   334  	if region == nil {
   335  		for k, v := range resp.Header {
   336  			s3Log.Debugf("%v = %v", k, v)
   337  		}
   338  	}
   339  	if server != nil && server[0] == "AmazonS3" {
   340  		isAws = true
   341  	}
   342  
   343  	switch resp.StatusCode {
   344  	case 200:
   345  		// note that this only happen if the bucket is in us-east-1
   346  		if len(fs.flags.Profile) == 0 {
   347  			fs.awsConfig.Credentials = credentials.AnonymousCredentials
   348  			s3Log.Infof("anonymous bucket detected")
   349  		}
   350  	case 400:
   351  		err = fuse.EINVAL
   352  	case 403:
   353  		err = syscall.EACCES
   354  	case 404:
   355  		err = fuse.ENOENT
   356  	case 405:
   357  		err = syscall.ENOTSUP
   358  	default:
   359  		err = awserr.New(strconv.Itoa(resp.StatusCode), resp.Status, nil)
   360  	}
   361  
   362  	if len(region) != 0 {
   363  		if region[0] != *fs.awsConfig.Region {
   364  			s3Log.Infof("Switching from region '%v' to '%v'",
   365  				*fs.awsConfig.Region, region[0])
   366  			fs.awsConfig.Region = &region[0]
   367  		}
   368  
   369  		// we detected a region, this is aws, the error is irrelevant
   370  		err = nil
   371  	}
   372  
   373  	return
   374  }
   375  
   376  func (fs *Goofys) cleanUpOldMPU() {
   377  	mpu, err := fs.s3.ListMultipartUploads(&s3.ListMultipartUploadsInput{Bucket: &fs.bucket})
   378  	if err != nil {
   379  		mapAwsError(err)
   380  		return
   381  	}
   382  	s3Log.Debug(mpu)
   383  
   384  	now := time.Now()
   385  	for _, upload := range mpu.Uploads {
   386  		expireTime := upload.Initiated.Add(48 * time.Hour)
   387  
   388  		if !expireTime.After(now) {
   389  			params := &s3.AbortMultipartUploadInput{
   390  				Bucket:   &fs.bucket,
   391  				Key:      upload.Key,
   392  				UploadId: upload.UploadId,
   393  			}
   394  			resp, err := fs.s3.AbortMultipartUpload(params)
   395  			s3Log.Debug(resp)
   396  
   397  			if mapAwsError(err) == syscall.EACCES {
   398  				break
   399  			}
   400  		} else {
   401  			s3Log.Debugf("Keeping MPU Key=%v Id=%v", *upload.Key, *upload.UploadId)
   402  		}
   403  	}
   404  }
   405  
   406  func (fs *Goofys) SigUsr1() {
   407  	fs.mu.Lock()
   408  
   409  	log.Infof("forgot %v inodes", fs.forgotCnt)
   410  	log.Infof("%v inodes", len(fs.inodes))
   411  	fs.mu.Unlock()
   412  	debug.FreeOSMemory()
   413  }
   414  
   415  // Find the given inode. Panic if it doesn't exist.
   416  //
   417  // LOCKS_REQUIRED(fs.mu)
   418  func (fs *Goofys) getInodeOrDie(id fuseops.InodeID) (inode *Inode) {
   419  	inode = fs.inodes[id]
   420  	if inode == nil {
   421  		panic(fmt.Sprintf("Unknown inode: %v", id))
   422  	}
   423  
   424  	return
   425  }
   426  
   427  func (fs *Goofys) StatFS(
   428  	ctx context.Context,
   429  	op *fuseops.StatFSOp) (err error) {
   430  
   431  	const BLOCK_SIZE = 4096
   432  	const TOTAL_SPACE = 1 * 1024 * 1024 * 1024 * 1024 * 1024 // 1PB
   433  	const TOTAL_BLOCKS = TOTAL_SPACE / BLOCK_SIZE
   434  	const INODES = 1 * 1000 * 1000 * 1000 // 1 billion
   435  	op.BlockSize = BLOCK_SIZE
   436  	op.Blocks = TOTAL_BLOCKS
   437  	op.BlocksFree = TOTAL_BLOCKS
   438  	op.BlocksAvailable = TOTAL_BLOCKS
   439  	op.IoSize = 1 * 1024 * 1024 // 1MB
   440  	op.Inodes = INODES
   441  	op.InodesFree = INODES
   442  	return
   443  }
   444  
   445  func (fs *Goofys) GetInodeAttributes(
   446  	ctx context.Context,
   447  	op *fuseops.GetInodeAttributesOp) (err error) {
   448  
   449  	fs.mu.Lock()
   450  	inode := fs.getInodeOrDie(op.Inode)
   451  	fs.mu.Unlock()
   452  
   453  	attr, err := inode.GetAttributes()
   454  	if err == nil {
   455  		op.Attributes = *attr
   456  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   457  	}
   458  
   459  	return
   460  }
   461  
   462  func (fs *Goofys) GetXattr(ctx context.Context,
   463  	op *fuseops.GetXattrOp) (err error) {
   464  	fs.mu.Lock()
   465  	inode := fs.getInodeOrDie(op.Inode)
   466  	fs.mu.Unlock()
   467  
   468  	value, err := inode.GetXattr(op.Name)
   469  	if err != nil {
   470  		return
   471  	}
   472  
   473  	op.BytesRead = len(value)
   474  
   475  	if len(op.Dst) < op.BytesRead {
   476  		return syscall.ERANGE
   477  	} else {
   478  		copy(op.Dst, value)
   479  		return
   480  	}
   481  }
   482  
   483  func (fs *Goofys) ListXattr(ctx context.Context,
   484  	op *fuseops.ListXattrOp) (err error) {
   485  	fs.mu.Lock()
   486  	inode := fs.getInodeOrDie(op.Inode)
   487  	fs.mu.Unlock()
   488  
   489  	xattrs, err := inode.ListXattr()
   490  
   491  	ncopied := 0
   492  
   493  	for _, name := range xattrs {
   494  		buf := op.Dst[ncopied:]
   495  		nlen := len(name) + 1
   496  
   497  		if nlen <= len(buf) {
   498  			copy(buf, name)
   499  			ncopied += nlen
   500  			buf[nlen-1] = '\x00'
   501  		}
   502  
   503  		op.BytesRead += nlen
   504  	}
   505  
   506  	if ncopied < op.BytesRead {
   507  		err = syscall.ERANGE
   508  	}
   509  
   510  	return
   511  }
   512  
   513  func (fs *Goofys) RemoveXattr(ctx context.Context,
   514  	op *fuseops.RemoveXattrOp) (err error) {
   515  	fs.mu.Lock()
   516  	inode := fs.getInodeOrDie(op.Inode)
   517  	fs.mu.Unlock()
   518  
   519  	inode.RemoveXattr(op.Name)
   520  
   521  	return
   522  }
   523  
   524  func (fs *Goofys) SetXattr(ctx context.Context,
   525  	op *fuseops.SetXattrOp) (err error) {
   526  	fs.mu.Lock()
   527  	inode := fs.getInodeOrDie(op.Inode)
   528  	fs.mu.Unlock()
   529  
   530  	inode.SetXattr(op.Name, op.Value, op.Flags)
   531  	return
   532  }
   533  
   534  func mapAwsError(err error) error {
   535  	if err == nil {
   536  		return nil
   537  	}
   538  
   539  	if awsErr, ok := err.(awserr.Error); ok {
   540  		if reqErr, ok := err.(awserr.RequestFailure); ok {
   541  			// A service error occurred
   542  			switch reqErr.StatusCode() {
   543  			case 400:
   544  				return fuse.EINVAL
   545  			case 403:
   546  				return syscall.EACCES
   547  			case 404:
   548  				return fuse.ENOENT
   549  			case 405:
   550  				return syscall.ENOTSUP
   551  			case 500:
   552  				return syscall.EAGAIN
   553  			default:
   554  				s3Log.Errorf("code=%v msg=%v request=%v\n", reqErr.Message(), reqErr.StatusCode(), reqErr.RequestID())
   555  				return reqErr
   556  			}
   557  		} else {
   558  			switch awsErr.Code() {
   559  			case "BucketRegionError":
   560  				// don't need to log anything, we should detect region after
   561  				return err
   562  			default:
   563  				// Generic AWS Error with Code, Message, and original error (if any)
   564  				s3Log.Errorf("code=%v msg=%v, err=%v\n", awsErr.Code(), awsErr.Message(), awsErr.OrigErr())
   565  				return awsErr
   566  			}
   567  		}
   568  	} else {
   569  		return err
   570  	}
   571  }
   572  
   573  func (fs *Goofys) key(name string) *string {
   574  	name = fs.prefix + name
   575  	return &name
   576  }
   577  
   578  // note that this is NOT the same as url.PathEscape in golang 1.8,
   579  // as this preserves / and url.PathEscape converts / to %2F
   580  func pathEscape(path string) string {
   581  	u := url.URL{Path: path}
   582  	return u.EscapedPath()
   583  }
   584  
   585  func (fs *Goofys) allocateInodeId() (id fuseops.InodeID) {
   586  	id = fs.nextInodeID
   587  	fs.nextInodeID++
   588  	return
   589  }
   590  
   591  func expired(cache time.Time, ttl time.Duration) bool {
   592  	return !cache.Add(ttl).After(time.Now())
   593  }
   594  
   595  func (fs *Goofys) LookUpInode(
   596  	ctx context.Context,
   597  	op *fuseops.LookUpInodeOp) (err error) {
   598  
   599  	var inode *Inode
   600  	var ok bool
   601  	defer func() { fuseLog.Debugf("<-- LookUpInode %v %v %v", op.Parent, op.Name, err) }()
   602  
   603  	fs.mu.Lock()
   604  	parent := fs.getInodeOrDie(op.Parent)
   605  	fs.mu.Unlock()
   606  
   607  	parent.mu.Lock()
   608  	fs.mu.Lock()
   609  	inode = parent.findChildUnlockedFull(op.Name)
   610  	if inode != nil {
   611  		ok = true
   612  		inode.Ref()
   613  
   614  		if expired(inode.AttrTime, fs.flags.StatCacheTTL) {
   615  			ok = false
   616  			if inode.fileHandles != 0 {
   617  				// we have an open file handle, object
   618  				// in S3 may not represent the true
   619  				// state of the file anyway, so just
   620  				// return what we know which is
   621  				// potentially more accurate
   622  				ok = true
   623  			} else {
   624  				inode.logFuse("lookup expired")
   625  			}
   626  		}
   627  	} else {
   628  		ok = false
   629  	}
   630  	fs.mu.Unlock()
   631  	parent.mu.Unlock()
   632  
   633  	if !ok {
   634  		var newInode *Inode
   635  
   636  		newInode, err = parent.LookUp(op.Name)
   637  		if err != nil {
   638  			if inode != nil {
   639  				// just kidding! pretend we didn't up the ref
   640  				fs.mu.Lock()
   641  				defer fs.mu.Unlock()
   642  
   643  				stale := inode.DeRef(1)
   644  				if stale {
   645  					delete(fs.inodes, inode.Id)
   646  					parent.removeChild(inode)
   647  				}
   648  			}
   649  			return err
   650  		}
   651  
   652  		if inode == nil {
   653  			parent.mu.Lock()
   654  			// check again if it's there, could have been
   655  			// added by another lookup or readdir
   656  			inode = parent.findChildUnlockedFull(op.Name)
   657  			if inode == nil {
   658  				fs.mu.Lock()
   659  				inode = newInode
   660  				fs.insertInode(parent, inode)
   661  				fs.mu.Unlock()
   662  			}
   663  			parent.mu.Unlock()
   664  		} else {
   665  			if newInode.Attributes.Mtime.IsZero() {
   666  				newInode.Attributes.Mtime = inode.Attributes.Mtime
   667  			}
   668  			inode.Attributes = newInode.Attributes
   669  			inode.AttrTime = time.Now()
   670  		}
   671  	}
   672  
   673  	op.Entry.Child = inode.Id
   674  	op.Entry.Attributes = inode.InflateAttributes()
   675  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   676  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
   677  
   678  	return
   679  }
   680  
   681  // LOCKS_REQUIRED(fs.mu)
   682  // LOCKS_REQUIRED(parent.mu)
   683  func (fs *Goofys) insertInode(parent *Inode, inode *Inode) {
   684  	inode.Id = fs.allocateInodeId()
   685  	parent.insertChildUnlocked(inode)
   686  	fs.inodes[inode.Id] = inode
   687  }
   688  
   689  // LOCKS_EXCLUDED(fs.mu)
   690  func (fs *Goofys) ForgetInode(
   691  	ctx context.Context,
   692  	op *fuseops.ForgetInodeOp) (err error) {
   693  
   694  	fs.mu.Lock()
   695  
   696  	inode := fs.getInodeOrDie(op.Inode)
   697  	stale := inode.DeRef(op.N)
   698  
   699  	if stale {
   700  		delete(fs.inodes, op.Inode)
   701  		fs.forgotCnt += 1
   702  		fs.mu.Unlock()
   703  
   704  		if inode.Parent != nil {
   705  			inode.Parent.removeChild(inode)
   706  		}
   707  	} else {
   708  		fs.mu.Unlock()
   709  	}
   710  
   711  	return
   712  }
   713  
   714  func (fs *Goofys) OpenDir(
   715  	ctx context.Context,
   716  	op *fuseops.OpenDirOp) (err error) {
   717  	fs.mu.Lock()
   718  
   719  	handleID := fs.nextHandleID
   720  	fs.nextHandleID++
   721  
   722  	in := fs.getInodeOrDie(op.Inode)
   723  	fs.mu.Unlock()
   724  
   725  	// XXX/is this a dir?
   726  	dh := in.OpenDir()
   727  
   728  	fs.mu.Lock()
   729  	defer fs.mu.Unlock()
   730  
   731  	fs.dirHandles[handleID] = dh
   732  	op.Handle = handleID
   733  
   734  	return
   735  }
   736  
   737  // LOCKS_EXCLUDED(fs.mu)
   738  func (fs *Goofys) insertInodeFromDirEntry(parent *Inode, entry *DirHandleEntry) (inode *Inode) {
   739  	parent.mu.Lock()
   740  	defer parent.mu.Unlock()
   741  
   742  	inode = parent.findChildUnlocked(*entry.Name, entry.Type == fuseutil.DT_Directory)
   743  	if inode == nil {
   744  		path := parent.getChildName(*entry.Name)
   745  		inode = NewInode(fs, parent, entry.Name, &path)
   746  		if entry.Type == fuseutil.DT_Directory {
   747  			inode.ToDir()
   748  		} else {
   749  			inode.Attributes = *entry.Attributes
   750  		}
   751  		if entry.ETag != nil {
   752  			inode.s3Metadata["etag"] = []byte(*entry.ETag)
   753  		}
   754  		if entry.StorageClass != nil {
   755  			inode.s3Metadata["storage-class"] = []byte(*entry.StorageClass)
   756  		}
   757  		// these are fake dir entries, we will realize the refcnt when
   758  		// lookup is done
   759  		inode.refcnt = 0
   760  
   761  		fs.mu.Lock()
   762  		defer fs.mu.Unlock()
   763  
   764  		fs.insertInode(parent, inode)
   765  	} else {
   766  		inode.mu.Lock()
   767  		defer inode.mu.Unlock()
   768  
   769  		if entry.ETag != nil {
   770  			inode.s3Metadata["etag"] = []byte(*entry.ETag)
   771  		}
   772  		if entry.StorageClass != nil {
   773  			inode.s3Metadata["storage-class"] = []byte(*entry.StorageClass)
   774  		}
   775  		inode.KnownSize = &entry.Attributes.Size
   776  		inode.Attributes.Mtime = entry.Attributes.Mtime
   777  		inode.AttrTime = time.Now()
   778  	}
   779  	return
   780  }
   781  
   782  func makeDirEntry(en *DirHandleEntry) fuseutil.Dirent {
   783  	return fuseutil.Dirent{
   784  		Name:   *en.Name,
   785  		Type:   en.Type,
   786  		Inode:  fuseops.RootInodeID + 1,
   787  		Offset: en.Offset,
   788  	}
   789  }
   790  
   791  // LOCKS_EXCLUDED(fs.mu)
   792  func (fs *Goofys) ReadDir(
   793  	ctx context.Context,
   794  	op *fuseops.ReadDirOp) (err error) {
   795  
   796  	// Find the handle.
   797  	fs.mu.Lock()
   798  	dh := fs.dirHandles[op.Handle]
   799  	fs.mu.Unlock()
   800  
   801  	if dh == nil {
   802  		panic(fmt.Sprintf("can't find dh=%v", op.Handle))
   803  	}
   804  
   805  	inode := dh.inode
   806  	inode.logFuse("ReadDir", op.Offset)
   807  
   808  	dh.mu.Lock()
   809  	defer dh.mu.Unlock()
   810  
   811  	readFromS3 := false
   812  
   813  	for i := op.Offset; ; i++ {
   814  		e, err := dh.ReadDir(i)
   815  		if err != nil {
   816  			return err
   817  		}
   818  		if e == nil {
   819  			// we've reached the end, if this was read
   820  			// from S3 then update the cache time
   821  			if readFromS3 {
   822  				inode.dir.DirTime = time.Now()
   823  				inode.Attributes.Mtime = inode.findChildMaxTime()
   824  			}
   825  			break
   826  		}
   827  
   828  		if e.Inode == 0 {
   829  			readFromS3 = true
   830  			fs.insertInodeFromDirEntry(inode, e)
   831  		}
   832  
   833  		n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], makeDirEntry(e))
   834  		if n == 0 {
   835  			break
   836  		}
   837  
   838  		dh.inode.logFuse("<-- ReadDir", *e.Name, e.Offset)
   839  
   840  		op.BytesRead += n
   841  	}
   842  
   843  	return
   844  }
   845  
   846  func (fs *Goofys) ReleaseDirHandle(
   847  	ctx context.Context,
   848  	op *fuseops.ReleaseDirHandleOp) (err error) {
   849  
   850  	fs.mu.Lock()
   851  	defer fs.mu.Unlock()
   852  
   853  	dh := fs.dirHandles[op.Handle]
   854  	dh.CloseDir()
   855  
   856  	fuseLog.Debugln("ReleaseDirHandle", *dh.inode.FullName())
   857  
   858  	delete(fs.dirHandles, op.Handle)
   859  
   860  	return
   861  }
   862  
   863  func (fs *Goofys) OpenFile(
   864  	ctx context.Context,
   865  	op *fuseops.OpenFileOp) (err error) {
   866  	fs.mu.Lock()
   867  	in := fs.getInodeOrDie(op.Inode)
   868  	fs.mu.Unlock()
   869  
   870  	fh, err := in.OpenFile()
   871  	if err != nil {
   872  		return
   873  	}
   874  
   875  	fs.mu.Lock()
   876  	defer fs.mu.Unlock()
   877  
   878  	handleID := fs.nextHandleID
   879  	fs.nextHandleID++
   880  
   881  	fs.fileHandles[handleID] = fh
   882  
   883  	op.Handle = handleID
   884  	op.KeepPageCache = true
   885  
   886  	return
   887  }
   888  
   889  func (fs *Goofys) ReadFile(
   890  	ctx context.Context,
   891  	op *fuseops.ReadFileOp) (err error) {
   892  
   893  	fs.mu.Lock()
   894  	fh := fs.fileHandles[op.Handle]
   895  	fs.mu.Unlock()
   896  
   897  	op.BytesRead, err = fh.ReadFile(op.Offset, op.Dst)
   898  
   899  	return
   900  }
   901  
   902  func (fs *Goofys) SyncFile(
   903  	ctx context.Context,
   904  	op *fuseops.SyncFileOp) (err error) {
   905  
   906  	// intentionally ignored, so that write()/sync()/write() works
   907  	// see https://github.com/kahing/goofys/issues/154
   908  	return
   909  }
   910  
   911  func (fs *Goofys) FlushFile(
   912  	ctx context.Context,
   913  	op *fuseops.FlushFileOp) (err error) {
   914  
   915  	fs.mu.Lock()
   916  	fh := fs.fileHandles[op.Handle]
   917  	fs.mu.Unlock()
   918  
   919  	err = fh.FlushFile()
   920  	if err != nil {
   921  		// if we returned success from creat() earlier
   922  		// linux may think this file exists even when it doesn't,
   923  		// until TypeCacheTTL is over
   924  		// TODO: figure out a way to make the kernel forget this inode
   925  		// see TestWriteAnonymousFuse
   926  		fs.mu.Lock()
   927  		inode := fs.getInodeOrDie(op.Inode)
   928  		fs.mu.Unlock()
   929  
   930  		if inode.KnownSize == nil {
   931  			inode.AttrTime = time.Time{}
   932  		}
   933  
   934  	}
   935  	fh.inode.logFuse("<-- FlushFile", err)
   936  
   937  	return
   938  }
   939  
   940  func (fs *Goofys) ReleaseFileHandle(
   941  	ctx context.Context,
   942  	op *fuseops.ReleaseFileHandleOp) (err error) {
   943  	fs.mu.Lock()
   944  	defer fs.mu.Unlock()
   945  
   946  	fh := fs.fileHandles[op.Handle]
   947  	fh.Release()
   948  
   949  	fuseLog.Debugln("ReleaseFileHandle", *fh.inode.FullName())
   950  
   951  	delete(fs.fileHandles, op.Handle)
   952  
   953  	// try to compact heap
   954  	//fs.bufferPool.MaybeGC()
   955  	return
   956  }
   957  
   958  func (fs *Goofys) CreateFile(
   959  	ctx context.Context,
   960  	op *fuseops.CreateFileOp) (err error) {
   961  
   962  	fs.mu.Lock()
   963  	parent := fs.getInodeOrDie(op.Parent)
   964  	fs.mu.Unlock()
   965  
   966  	inode, fh := parent.Create(op.Name)
   967  
   968  	fs.mu.Lock()
   969  	defer fs.mu.Unlock()
   970  
   971  	fs.insertInode(parent, inode)
   972  
   973  	op.Entry.Child = inode.Id
   974  	op.Entry.Attributes = inode.InflateAttributes()
   975  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
   976  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
   977  
   978  	// Allocate a handle.
   979  	handleID := fs.nextHandleID
   980  	fs.nextHandleID++
   981  
   982  	fs.fileHandles[handleID] = fh
   983  
   984  	op.Handle = handleID
   985  
   986  	inode.logFuse("<-- CreateFile")
   987  
   988  	return
   989  }
   990  
   991  func (fs *Goofys) MkDir(
   992  	ctx context.Context,
   993  	op *fuseops.MkDirOp) (err error) {
   994  
   995  	fs.mu.Lock()
   996  	parent := fs.getInodeOrDie(op.Parent)
   997  	fs.mu.Unlock()
   998  
   999  	// ignore op.Mode for now
  1000  	inode, err := parent.MkDir(op.Name)
  1001  	if err != nil {
  1002  		return err
  1003  	}
  1004  
  1005  	fs.mu.Lock()
  1006  	defer fs.mu.Unlock()
  1007  
  1008  	fs.insertInode(parent, inode)
  1009  
  1010  	op.Entry.Child = inode.Id
  1011  	op.Entry.Attributes = inode.InflateAttributes()
  1012  	op.Entry.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1013  	op.Entry.EntryExpiration = time.Now().Add(fs.flags.TypeCacheTTL)
  1014  
  1015  	return
  1016  }
  1017  
  1018  func (fs *Goofys) RmDir(
  1019  	ctx context.Context,
  1020  	op *fuseops.RmDirOp) (err error) {
  1021  
  1022  	fs.mu.Lock()
  1023  	parent := fs.getInodeOrDie(op.Parent)
  1024  	fs.mu.Unlock()
  1025          // Remove s3 dir
  1026          fullName := parent.getChildName(op.Name) + "/"
  1027          fs.s3.DeleteObject(&s3.DeleteObjectInput{Bucket: &fs.bucket, Key: fs.key(fullName + "/")})
  1028          inode := parent.findChildUnlocked(op.Name, true)
  1029          if inode != nil {
  1030                  parent.removeChildUnlocked(inode)
  1031                  inode.Parent = nil
  1032          }
  1033  	err = parent.RmDir(op.Name)
  1034  	parent.logFuse("<-- RmDir", op.Name, err)
  1035  	return
  1036  }
  1037  
  1038  func (fs *Goofys) SetInodeAttributes(
  1039  	ctx context.Context,
  1040  	op *fuseops.SetInodeAttributesOp) (err error) {
  1041  
  1042  	fs.mu.Lock()
  1043  	inode := fs.getInodeOrDie(op.Inode)
  1044  	fs.mu.Unlock()
  1045  
  1046  	attr, err := inode.GetAttributes()
  1047  	if err == nil {
  1048  		op.Attributes = *attr
  1049  		op.AttributesExpiration = time.Now().Add(fs.flags.StatCacheTTL)
  1050  	}
  1051  	return
  1052  }
  1053  
  1054  func (fs *Goofys) WriteFile(
  1055  	ctx context.Context,
  1056  	op *fuseops.WriteFileOp) (err error) {
  1057  
  1058  	fs.mu.Lock()
  1059  
  1060  	fh, ok := fs.fileHandles[op.Handle]
  1061  	if !ok {
  1062  		panic(fmt.Sprintf("WriteFile: can't find handle %v", op.Handle))
  1063  	}
  1064  	fs.mu.Unlock()
  1065  
  1066  	err = fh.WriteFile(op.Offset, op.Data)
  1067  
  1068  	return
  1069  }
  1070  
  1071  func (fs *Goofys) Unlink(
  1072  	ctx context.Context,
  1073  	op *fuseops.UnlinkOp) (err error) {
  1074  
  1075  	fs.mu.Lock()
  1076  	parent := fs.getInodeOrDie(op.Parent)
  1077  	fs.mu.Unlock()
  1078  
  1079  	err = parent.Unlink(op.Name)
  1080  	return
  1081  }
  1082  
  1083  func (fs *Goofys) Rename(
  1084  	ctx context.Context,
  1085  	op *fuseops.RenameOp) (err error) {
  1086  
  1087  	fs.mu.Lock()
  1088  	parent := fs.getInodeOrDie(op.OldParent)
  1089  	newParent := fs.getInodeOrDie(op.NewParent)
  1090  	fs.mu.Unlock()
  1091  
  1092  	// XXX don't hold the lock the entire time
  1093  	if op.OldParent == op.NewParent {
  1094  		parent.mu.Lock()
  1095  		defer parent.mu.Unlock()
  1096  	} else {
  1097  		// lock ordering to prevent deadlock
  1098  		if op.OldParent < op.NewParent {
  1099  			parent.mu.Lock()
  1100  			newParent.mu.Lock()
  1101  		} else {
  1102  			newParent.mu.Lock()
  1103  			parent.mu.Lock()
  1104  		}
  1105  		defer parent.mu.Unlock()
  1106  		defer newParent.mu.Unlock()
  1107  	}
  1108  
  1109  	err = parent.Rename(op.OldName, newParent, op.NewName)
  1110  	if err != nil {
  1111  		if err == fuse.ENOENT {
  1112  			// if the source doesn't exist, it could be
  1113  			// because this is a new file and we haven't
  1114  			// flushed it yet, pretend that's ok because
  1115  			// when we flush we will handle the rename
  1116  			inode := parent.findChildUnlocked(op.OldName, false)
  1117  			if inode != nil && inode.fileHandles != 0 {
  1118  				err = nil
  1119  			}
  1120  		}
  1121  	}
  1122  	if err == nil {
  1123  		inode := parent.findChildUnlockedFull(op.OldName)
  1124  		if inode != nil {
  1125  			inode.mu.Lock()
  1126  			defer inode.mu.Unlock()
  1127  
  1128  			parent.removeChildUnlocked(inode)
  1129  
  1130  			newNode := newParent.findChildUnlocked(op.NewName, inode.isDir())
  1131  			if newNode != nil {
  1132  				// this file's been overwritten, it's
  1133  				// been detached but we can't delete
  1134  				// it just yet, because the kernel
  1135  				// will still send forget ops to us
  1136  				newParent.removeChildUnlocked(newNode)
  1137  				newNode.Parent = nil
  1138  			}
  1139  
  1140  			inode.Name = &op.NewName
  1141  			inode.Parent = newParent
  1142  			newParent.insertChildUnlocked(inode)
  1143  		}
  1144  	}
  1145  	return
  1146  }
  1147  
  1148  func (fs *Goofys) getMimeType(fileName string) (retMime *string) {
  1149  	if fs.flags.UseContentType {
  1150  		dotPosition := strings.LastIndex(fileName, ".")
  1151  		if dotPosition == -1 {
  1152  			return nil
  1153  		}
  1154  		mimeType := mime.TypeByExtension(fileName[dotPosition:])
  1155  		if mimeType == "" {
  1156  			return nil
  1157  		}
  1158  		semicolonPosition := strings.LastIndex(mimeType, ";")
  1159  		if semicolonPosition == -1 {
  1160  			return &mimeType
  1161  		}
  1162  		retMime = aws.String(mimeType[:semicolonPosition])
  1163  	}
  1164  
  1165  	return
  1166  }