github.com/VertebrateResequencing/muxfys@v3.0.5+incompatible/muxfys.go (about)

     1  // Copyright © 2017, 2018 Genome Research Limited
     2  // Author: Sendu Bala <sb10@sanger.ac.uk>.
     3  //
     4  //  This file is part of muxfys.
     5  //
     6  //  muxfys is free software: you can redistribute it and/or modify
     7  //  it under the terms of the GNU Lesser General Public License as published by
     8  //  the Free Software Foundation, either version 3 of the License, or
     9  //  (at your option) any later version.
    10  //
    11  //  muxfys is distributed in the hope that it will be useful,
    12  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  //  GNU Lesser General Public License for more details.
    15  //
    16  //  You should have received a copy of the GNU Lesser General Public License
    17  //  along with muxfys. If not, see <http://www.gnu.org/licenses/>.
    18  
    19  /*
    20  Package muxfys is a pure Go library that lets you in-process temporarily
    21  fuse-mount remote file systems or object stores as a "filey" system. Currently
    22  only support for S3-like systems has been implemented.
    23  
    24  It has high performance, and is easy to use with nothing else to install, and no
    25  root permissions needed (except to initially install/configure fuse: on old
    26  linux you may need to install fuse-utils, and for macOS you'll need to install
    27  osxfuse; for both you must ensure that 'user_allow_other' is set in
    28  /etc/fuse.conf or equivalent).
    29  
    30  It allows "multiplexing": you can mount multiple different buckets (or sub
    31  directories of the same bucket) on the same local directory. This makes commands
    32  you want to run against the files in your buckets much simpler, eg. instead of
    33  mounting s3://publicbucket, s3://myinputbucket and s3://myoutputbucket to
    34  separate mount points and running:
    35  
    36   $ myexe -ref /mnt/publicbucket/refs/human/ref.fa -i /mnt/myinputbucket/xyz/123/
    37     input.file > /mnt/myoutputbucket/xyz/123/output.file
    38  
    39  You could multiplex the 3 buckets (at the desired paths) on to the directory you
    40  will work from and just run:
    41  
    42   $ myexe -ref ref.fa -i input.file > output.file
    43  
    44  When using muxfys, you 1) mount, 2) do something that needs the files in your S3
    45  bucket(s), 3) unmount. Then repeat 1-3 for other things that need data in your
    46  S3 buckets.
    47  
    48  # Usage
    49  
    50      import "github.com/VertebrateResequencing/muxfys"
    51  
    52      // fully manual S3 configuration
    53      accessorConfig := &muxfys.S3Config{
    54          Target:    "https://s3.amazonaws.com/mybucket/subdir",
    55          Region:    "us-east-1",
    56          AccessKey: os.Getenv("AWS_ACCESS_KEY_ID"),
    57          SecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"),
    58      }
    59      accessor, err := muxfys.NewS3Accessor(accessorConfig)
    60      if err != nil {
    61          log.Fatal(err)
    62      }
    63      remoteConfig1 := &muxfys.RemoteConfig{
    64          Accessor: accessor,
    65          CacheDir: "/tmp/muxfys/cache",
    66          Write:    true,
    67      }
    68  
    69      // or read configuration from standard AWS S3 config files and environment
    70      // variables
    71      accessorConfig, err = muxfys.S3ConfigFromEnvironment("default",
    72          "myotherbucket/another/subdir")
    73      if err != nil {
    74          log.Fatalf("could not read config from environment: %s\n", err)
    75      }
    76      accessor, err = muxfys.NewS3Accessor(accessorConfig)
    77      if err != nil {
    78          log.Fatal(err)
    79      }
    80      remoteConfig2 := &muxfys.RemoteConfig{
    81          Accessor:  accessor,
    82          CacheData: true,
    83      }
    84  
    85      cfg := &muxfys.Config{
    86          Mount:     "/tmp/muxfys/mount",
    87          CacheBase: "/tmp",
    88          Retries:   3,
    89          Verbose:   true,
    90      }
    91  
    92      fs, err := muxfys.New(cfg)
    93      if err != nil {
    94          log.Fatalf("bad configuration: %s\n", err)
    95      }
    96  
    97      err = fs.Mount(remoteConfig, remoteConfig2)
    98      if err != nil {
    99          log.Fatalf("could not mount: %s\n", err)
   100      }
   101      fs.UnmountOnDeath()
   102  
   103      // read from & write to files in /tmp/muxfys/mount, which contains the
   104      // contents of mybucket/subdir and myotherbucket/another/subdir; writes will
   105      // get uploaded to mybucket/subdir when you Unmount()
   106  
   107      err = fs.Unmount()
   108      if err != nil {
   109          log.Fatalf("could not unmount: %s\n", err)
   110      }
   111  
   112      logs := fs.Logs()
   113  
   114  # Extending
   115  
   116  To add support for a new kind of remote file system or object store, simply
   117  implement the RemoteAccessor interface and supply an instance of that to
   118  RemoteConfig.
   119  */
   120  package muxfys
   121  
   122  import (
   123  	"fmt"
   124  	"io"
   125  	"io/ioutil"
   126  	"os"
   127  	"os/signal"
   128  	"os/user"
   129  	"path/filepath"
   130  	"sort"
   131  	"strconv"
   132  	"sync"
   133  	"syscall"
   134  	"time"
   135  
   136  	"github.com/hanwen/go-fuse/fuse"
   137  	"github.com/hanwen/go-fuse/fuse/nodefs"
   138  	"github.com/hanwen/go-fuse/fuse/pathfs"
   139  	"github.com/inconshreveable/log15"
   140  	"github.com/mitchellh/go-homedir"
   141  	"github.com/sb10/l15h"
   142  )
   143  
   144  const (
   145  	dirMode     = 0700
   146  	fileMode    = 0600
   147  	dirSize     = uint64(4096)
   148  	symlinkSize = uint64(7)
   149  )
   150  
   151  var (
   152  	logHandlerSetter = l15h.NewChanger(log15.DiscardHandler())
   153  	pkgLogger        = log15.New("pkg", "muxfys")
   154  	exitFunc         = os.Exit
   155  	deathSignals     = []os.Signal{os.Interrupt, syscall.SIGTERM}
   156  )
   157  
   158  func init() {
   159  	pkgLogger.SetHandler(l15h.ChangeableHandler(logHandlerSetter))
   160  }
   161  
   162  // Config struct provides the configuration of a MuxFys.
   163  type Config struct {
   164  	// Mount is the local directory to mount on top of (muxfys will try to
   165  	// create this if it doesn't exist). If not supplied, defaults to the
   166  	// subdirectory "mnt" in the current working directory. Note that mounting
   167  	// will only succeed if the Mount directory either doesn't exist or is
   168  	// empty.
   169  	Mount string
   170  
   171  	// Retries is the number of times to automatically retry failed remote
   172  	// system requests. The default of 0 means don't retry; at least 3 is
   173  	// recommended.
   174  	Retries int
   175  
   176  	// CacheBase is the base directory that will be used to create cache
   177  	// directories when a RemoteConfig that you Mount() has CacheData true but
   178  	// CacheDir undefined. Defaults to the current working directory.
   179  	CacheBase string
   180  
   181  	// Verbose results in every remote request getting an entry in the output of
   182  	// Logs(). Errors always appear there.
   183  	Verbose bool
   184  }
   185  
   186  // MuxFys struct is the main filey system object.
   187  type MuxFys struct {
   188  	pathfs.FileSystem
   189  	mountPoint      string
   190  	cacheBase       string
   191  	dirAttr         *fuse.Attr
   192  	server          *fuse.Server
   193  	mutex           sync.Mutex
   194  	mapMutex        sync.RWMutex
   195  	dirs            map[string][]*remote
   196  	dirContents     map[string][]fuse.DirEntry
   197  	files           map[string]*fuse.Attr
   198  	fileToRemote    map[string]*remote
   199  	createdFiles    map[string]bool
   200  	createdDirs     map[string]bool
   201  	mounted         bool
   202  	handlingSignals bool
   203  	deathSignals    chan os.Signal
   204  	ignoreSignals   chan bool
   205  	remotes         []*remote
   206  	writeRemote     *remote
   207  	maxAttempts     int
   208  	logStore        *l15h.Store
   209  	log15.Logger
   210  }
   211  
   212  // New returns a MuxFys that you'll use to Mount() your remote file systems or
   213  // object stores, ensure you un-mount if killed by calling UnmountOnDeath(),
   214  // then Unmount() when you're done. You might check Logs() afterwards. The other
   215  // methods of MuxFys can be ignored in most cases.
   216  func New(config *Config) (*MuxFys, error) {
   217  	mountPoint := config.Mount
   218  	if mountPoint == "" {
   219  		mountPoint = "mnt"
   220  	}
   221  	mountPoint, err := homedir.Expand(mountPoint)
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  	mountPoint, err = filepath.Abs(mountPoint)
   226  	if err != nil {
   227  		return nil, err
   228  	}
   229  
   230  	// create mount point if necessary
   231  	err = os.MkdirAll(mountPoint, os.FileMode(dirMode))
   232  	if err != nil {
   233  		return nil, err
   234  	}
   235  
   236  	// check that it's empty
   237  	entries, err := ioutil.ReadDir(mountPoint)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  	if len(entries) > 0 {
   242  		return nil, fmt.Errorf("Mount directory %s was not empty", mountPoint)
   243  	}
   244  
   245  	cacheBase := config.CacheBase
   246  	if cacheBase == "" {
   247  		cacheBase, err = os.Getwd()
   248  		if err != nil {
   249  			return nil, err
   250  		}
   251  	}
   252  
   253  	// make a logger with context for us, that will store log messages in memory
   254  	// but is also capable of logging anywhere the user wants via
   255  	// SetLogHandler()
   256  	logger := pkgLogger.New("mount", mountPoint)
   257  	store := l15h.NewStore()
   258  	logLevel := log15.LvlError
   259  	if config.Verbose {
   260  		logLevel = log15.LvlInfo
   261  	}
   262  	l15h.AddHandler(logger, log15.LvlFilterHandler(logLevel, l15h.CallerInfoHandler(l15h.StoreHandler(store, log15.LogfmtFormat()))))
   263  
   264  	// initialize ourselves
   265  	fs := &MuxFys{
   266  		FileSystem:   pathfs.NewDefaultFileSystem(),
   267  		mountPoint:   mountPoint,
   268  		cacheBase:    cacheBase,
   269  		dirs:         make(map[string][]*remote),
   270  		dirContents:  make(map[string][]fuse.DirEntry),
   271  		files:        make(map[string]*fuse.Attr),
   272  		fileToRemote: make(map[string]*remote),
   273  		createdFiles: make(map[string]bool),
   274  		createdDirs:  make(map[string]bool),
   275  		maxAttempts:  config.Retries + 1,
   276  		logStore:     store,
   277  		Logger:       logger,
   278  	}
   279  
   280  	// we'll always use the same attributes for our directories
   281  	mTime := uint64(time.Now().Unix())
   282  	fs.dirAttr = &fuse.Attr{
   283  		Size:  dirSize,
   284  		Mode:  fuse.S_IFDIR | uint32(dirMode),
   285  		Mtime: mTime,
   286  		Atime: mTime,
   287  		Ctime: mTime,
   288  	}
   289  
   290  	return fs, err
   291  }
   292  
   293  // Mount carries out the mounting of your supplied RemoteConfigs to your
   294  // configured mount point. On return, the files in your remote(s) will be
   295  // accessible.
   296  //
   297  // Once mounted, you can't mount again until you Unmount().
   298  //
   299  // If more than 1 RemoteConfig is supplied, the remotes will become multiplexed:
   300  // your mount point will show the combined contents of all your remote systems.
   301  // If multiple remotes have a directory with the same name, that directory's
   302  // contents will in in turn show the contents of all those directories. If
   303  // multiple remotes have a file with the same name in the same directory, reads
   304  // will come from the first remote you configured that has that file.
   305  func (fs *MuxFys) Mount(rcs ...*RemoteConfig) error {
   306  	if len(rcs) == 0 {
   307  		return fmt.Errorf("At least one RemoteConfig must be supplied")
   308  	}
   309  
   310  	fs.mutex.Lock()
   311  	defer fs.mutex.Unlock()
   312  	if fs.mounted {
   313  		return fmt.Errorf("Can't mount more that once at a time")
   314  	}
   315  
   316  	// create a remote for every RemoteConfig
   317  	for _, c := range rcs {
   318  		r, err := newRemote(c.Accessor, c.CacheData, c.CacheDir, fs.cacheBase, c.Write, fs.maxAttempts, fs.Logger)
   319  		if err != nil {
   320  			return err
   321  		}
   322  
   323  		fs.remotes = append(fs.remotes, r)
   324  		if r.write {
   325  			if fs.writeRemote != nil {
   326  				return fmt.Errorf("You can't have more than one writeable remote")
   327  			}
   328  			fs.writeRemote = r
   329  		}
   330  	}
   331  
   332  	uid, gid, err := userAndGroup()
   333  	if err != nil {
   334  		return err
   335  	}
   336  
   337  	opts := &nodefs.Options{
   338  		NegativeTimeout: time.Second,
   339  		AttrTimeout:     time.Second,
   340  		EntryTimeout:    time.Second,
   341  		Owner: &fuse.Owner{
   342  			Uid: uid,
   343  			Gid: gid,
   344  		},
   345  		Debug: false,
   346  	}
   347  	pathFsOpts := &pathfs.PathNodeFsOptions{ClientInodes: false} // false means we can't hardlink, but our inodes are stable *** does it matter if they're unstable?
   348  	pathFs := pathfs.NewPathNodeFs(fs, pathFsOpts)
   349  	conn := nodefs.NewFileSystemConnector(pathFs.Root(), opts)
   350  	mOpts := &fuse.MountOptions{
   351  		AllowOther:           true,
   352  		FsName:               "MuxFys",
   353  		Name:                 "MuxFys",
   354  		RememberInodes:       true,
   355  		DisableXAttrs:        true,
   356  		IgnoreSecurityLabels: true,
   357  		Debug:                false,
   358  	}
   359  	fs.server, err = fuse.NewServer(conn.RawFS(), fs.mountPoint, mOpts)
   360  	if err != nil {
   361  		return err
   362  	}
   363  
   364  	go fs.server.Serve()
   365  	err = fs.server.WaitMount()
   366  	if err != nil {
   367  		return err
   368  	}
   369  
   370  	fs.mounted = true
   371  	return err
   372  }
   373  
   374  // userAndGroup returns the current uid and gid; we only ever mount with dir and
   375  // file permissions for the current user.
   376  func userAndGroup() (uid uint32, gid uint32, err error) {
   377  	user, err := user.Current()
   378  	if err != nil {
   379  		return uid, gid, err
   380  	}
   381  
   382  	uid64, err := strconv.ParseInt(user.Uid, 10, 32)
   383  	if err != nil {
   384  		return uid, gid, err
   385  	}
   386  
   387  	gid64, err := strconv.ParseInt(user.Gid, 10, 32)
   388  	if err != nil {
   389  		return uid, gid, err
   390  	}
   391  
   392  	return uint32(uid64), uint32(gid64), err
   393  }
   394  
   395  // UnmountOnDeath captures SIGINT (ctrl-c) and SIGTERM (kill) signals, then
   396  // calls Unmount() before calling os.Exit(1 if the unmount worked, 2 otherwise)
   397  // to terminate your program. Manually calling Unmount() after this cancels the
   398  // signal capture. This does NOT block.
   399  func (fs *MuxFys) UnmountOnDeath() {
   400  	fs.mutex.Lock()
   401  	defer fs.mutex.Unlock()
   402  	if !fs.mounted || fs.handlingSignals {
   403  		return
   404  	}
   405  
   406  	fs.deathSignals = make(chan os.Signal, 2)
   407  	signal.Notify(fs.deathSignals, deathSignals...)
   408  	fs.handlingSignals = true
   409  	fs.ignoreSignals = make(chan bool)
   410  
   411  	go func() {
   412  		select {
   413  		case <-fs.ignoreSignals:
   414  			signal.Stop(fs.deathSignals)
   415  			fs.mutex.Lock()
   416  			fs.handlingSignals = false
   417  			fs.mutex.Unlock()
   418  			return
   419  		case <-fs.deathSignals:
   420  			fs.mutex.Lock()
   421  			fs.handlingSignals = false
   422  			fs.mutex.Unlock()
   423  			err := fs.Unmount()
   424  			if err != nil {
   425  				fs.Error("Failed to unmount on death", "err", err)
   426  				exitFunc(2)
   427  				return
   428  			}
   429  			exitFunc(1)
   430  			return
   431  		}
   432  	}()
   433  }
   434  
   435  // Unmount must be called when you're done reading from/ writing to your
   436  // remotes. Be sure to close any open filehandles before hand!
   437  //
   438  // It's a good idea to defer this after calling Mount(), and possibly also call
   439  // UnmountOnDeath().
   440  //
   441  // In CacheData mode, it is only at Unmount() that any files you created or
   442  // altered get uploaded, so this may take some time. You can optionally supply a
   443  // bool which if true prevents any uploads.
   444  //
   445  // If a remote was not configured with a specific CacheDir but CacheData was
   446  // true, the CacheDir will be deleted.
   447  func (fs *MuxFys) Unmount(doNotUpload ...bool) error {
   448  	fs.mutex.Lock()
   449  	defer fs.mutex.Unlock()
   450  
   451  	if fs.handlingSignals {
   452  		fs.ignoreSignals <- true
   453  	}
   454  
   455  	var err error
   456  	if fs.mounted {
   457  		err = fs.server.Unmount()
   458  		if err == nil {
   459  			fs.mounted = false
   460  		}
   461  		// <-time.After(10 * time.Second)
   462  	}
   463  
   464  	if !(len(doNotUpload) == 1 && doNotUpload[0]) {
   465  		// upload files that got opened for writing
   466  		uerr := fs.uploadCreated()
   467  		if uerr != nil {
   468  			if err == nil {
   469  				err = uerr
   470  			} else {
   471  				err = fmt.Errorf("%s; %s", err.Error(), uerr.Error())
   472  			}
   473  		}
   474  	}
   475  
   476  	// delete any cachedirs we created
   477  	for _, remote := range fs.remotes {
   478  		if remote.cacheIsTmp {
   479  			errd := remote.deleteCache()
   480  			if errd != nil {
   481  				remote.Warn("Unmount cache deletion failed", "err", errd)
   482  				// *** this can fail on nfs due to "device or resource busy",
   483  				// but retrying doesn't help. Waiting 10s immediately before or
   484  				// after a failure also doesn't help; you have to always wait
   485  				// 10s after fs.server.Unmount() to be able to delete the cache!
   486  			}
   487  		}
   488  	}
   489  
   490  	// clean out our caches; one reason to unmount is to force recognition of
   491  	// new files when we re-mount
   492  	fs.mapMutex.Lock()
   493  	fs.dirs = make(map[string][]*remote)
   494  	fs.dirContents = make(map[string][]fuse.DirEntry)
   495  	fs.files = make(map[string]*fuse.Attr)
   496  	fs.fileToRemote = make(map[string]*remote)
   497  	fs.createdFiles = make(map[string]bool)
   498  	fs.createdDirs = make(map[string]bool)
   499  	fs.mapMutex.Unlock()
   500  
   501  	// forget our remotes so we can be remounted with other remotes
   502  	fs.remotes = nil
   503  	fs.writeRemote = nil
   504  
   505  	return err
   506  }
   507  
   508  // uploadCreated uploads any files that previously got created. Only functions
   509  // in CacheData mode.
   510  func (fs *MuxFys) uploadCreated() error {
   511  	if fs.writeRemote != nil && fs.writeRemote.cacheData {
   512  		fails := 0
   513  
   514  		// since mtimes in S3 are stored as the upload time, we sort our created
   515  		// files by their mtime to at least upload them in the correct order
   516  		var createdFiles []string
   517  		fs.mapMutex.Lock()
   518  		for name := range fs.createdFiles {
   519  			createdFiles = append(createdFiles, name)
   520  		}
   521  		if len(createdFiles) > 1 {
   522  			sort.Slice(createdFiles, func(i, j int) bool {
   523  				return fs.files[createdFiles[i]].Mtime < fs.files[createdFiles[j]].Mtime
   524  			})
   525  		}
   526  
   527  		for _, name := range createdFiles {
   528  			remotePath := fs.writeRemote.getRemotePath(name)
   529  			localPath := fs.writeRemote.getLocalPath(remotePath)
   530  
   531  			// upload file
   532  			status := fs.writeRemote.uploadFile(localPath, remotePath)
   533  			if status != fuse.OK {
   534  				fails++
   535  				continue
   536  			}
   537  
   538  			delete(fs.createdFiles, name)
   539  		}
   540  		fs.mapMutex.Unlock()
   541  
   542  		if fails > 0 {
   543  			return fmt.Errorf("failed to upload %d files", fails)
   544  		}
   545  	}
   546  	return nil
   547  }
   548  
   549  // Logs returns messages generated while mounted; you might call it after
   550  // Unmount() to see how things went.
   551  //
   552  // By default these will only be errors that occurred, but if this MuxFys was
   553  // configured with Verbose on, it will also contain informational and warning
   554  // messages.
   555  //
   556  // If the muxfys package was configured with a log Handler (see
   557  // SetLogHandler()), these same messages would have been logged as they
   558  // occurred.
   559  func (fs *MuxFys) Logs() []string {
   560  	return fs.logStore.Logs()
   561  }
   562  
   563  // SetLogHandler defines how log messages (globally for this package) are
   564  // logged. Logs are always retrievable as strings from individual MuxFys
   565  // instances using MuxFys.Logs(), but otherwise by default are discarded.
   566  //
   567  // To have them logged somewhere as they are emitted, supply a
   568  // github.com/inconshreveable/log15.Handler. For example, supplying
   569  // log15.StderrHandler would log everything to STDERR.
   570  func SetLogHandler(h log15.Handler) {
   571  	logHandlerSetter.SetHandler(h)
   572  }
   573  
   574  // logClose is for use to Close() an object during a defer when you don't care
   575  // if the Close() returns an error, but do want non-EOF errors logged. Extra
   576  // args are passed as additional context for the logger.
   577  func logClose(logger log15.Logger, obj io.Closer, msg string, extra ...interface{}) {
   578  	err := obj.Close()
   579  	if err != nil && err.Error() != "EOF" {
   580  		extra = append(extra, "err", err)
   581  		logger.Warn("failed to close "+msg, extra...)
   582  	}
   583  }