github.com/VertebrateResequencing/muxfys@v3.0.5+incompatible/muxfys.go (about) 1 // Copyright © 2017, 2018 Genome Research Limited 2 // Author: Sendu Bala <sb10@sanger.ac.uk>. 3 // 4 // This file is part of muxfys. 5 // 6 // muxfys is free software: you can redistribute it and/or modify 7 // it under the terms of the GNU Lesser General Public License as published by 8 // the Free Software Foundation, either version 3 of the License, or 9 // (at your option) any later version. 10 // 11 // muxfys is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU Lesser General Public License for more details. 15 // 16 // You should have received a copy of the GNU Lesser General Public License 17 // along with muxfys. If not, see <http://www.gnu.org/licenses/>. 18 19 /* 20 Package muxfys is a pure Go library that lets you in-process temporarily 21 fuse-mount remote file systems or object stores as a "filey" system. Currently 22 only support for S3-like systems has been implemented. 23 24 It has high performance, and is easy to use with nothing else to install, and no 25 root permissions needed (except to initially install/configure fuse: on old 26 linux you may need to install fuse-utils, and for macOS you'll need to install 27 osxfuse; for both you must ensure that 'user_allow_other' is set in 28 /etc/fuse.conf or equivalent). 29 30 It allows "multiplexing": you can mount multiple different buckets (or sub 31 directories of the same bucket) on the same local directory. This makes commands 32 you want to run against the files in your buckets much simpler, eg. instead of 33 mounting s3://publicbucket, s3://myinputbucket and s3://myoutputbucket to 34 separate mount points and running: 35 36 $ myexe -ref /mnt/publicbucket/refs/human/ref.fa -i /mnt/myinputbucket/xyz/123/ 37 input.file > /mnt/myoutputbucket/xyz/123/output.file 38 39 You could multiplex the 3 buckets (at the desired paths) on to the directory you 40 will work from and just run: 41 42 $ myexe -ref ref.fa -i input.file > output.file 43 44 When using muxfys, you 1) mount, 2) do something that needs the files in your S3 45 bucket(s), 3) unmount. Then repeat 1-3 for other things that need data in your 46 S3 buckets. 47 48 # Usage 49 50 import "github.com/VertebrateResequencing/muxfys" 51 52 // fully manual S3 configuration 53 accessorConfig := &muxfys.S3Config{ 54 Target: "https://s3.amazonaws.com/mybucket/subdir", 55 Region: "us-east-1", 56 AccessKey: os.Getenv("AWS_ACCESS_KEY_ID"), 57 SecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), 58 } 59 accessor, err := muxfys.NewS3Accessor(accessorConfig) 60 if err != nil { 61 log.Fatal(err) 62 } 63 remoteConfig1 := &muxfys.RemoteConfig{ 64 Accessor: accessor, 65 CacheDir: "/tmp/muxfys/cache", 66 Write: true, 67 } 68 69 // or read configuration from standard AWS S3 config files and environment 70 // variables 71 accessorConfig, err = muxfys.S3ConfigFromEnvironment("default", 72 "myotherbucket/another/subdir") 73 if err != nil { 74 log.Fatalf("could not read config from environment: %s\n", err) 75 } 76 accessor, err = muxfys.NewS3Accessor(accessorConfig) 77 if err != nil { 78 log.Fatal(err) 79 } 80 remoteConfig2 := &muxfys.RemoteConfig{ 81 Accessor: accessor, 82 CacheData: true, 83 } 84 85 cfg := &muxfys.Config{ 86 Mount: "/tmp/muxfys/mount", 87 CacheBase: "/tmp", 88 Retries: 3, 89 Verbose: true, 90 } 91 92 fs, err := muxfys.New(cfg) 93 if err != nil { 94 log.Fatalf("bad configuration: %s\n", err) 95 } 96 97 err = fs.Mount(remoteConfig, remoteConfig2) 98 if err != nil { 99 log.Fatalf("could not mount: %s\n", err) 100 } 101 fs.UnmountOnDeath() 102 103 // read from & write to files in /tmp/muxfys/mount, which contains the 104 // contents of mybucket/subdir and myotherbucket/another/subdir; writes will 105 // get uploaded to mybucket/subdir when you Unmount() 106 107 err = fs.Unmount() 108 if err != nil { 109 log.Fatalf("could not unmount: %s\n", err) 110 } 111 112 logs := fs.Logs() 113 114 # Extending 115 116 To add support for a new kind of remote file system or object store, simply 117 implement the RemoteAccessor interface and supply an instance of that to 118 RemoteConfig. 119 */ 120 package muxfys 121 122 import ( 123 "fmt" 124 "io" 125 "io/ioutil" 126 "os" 127 "os/signal" 128 "os/user" 129 "path/filepath" 130 "sort" 131 "strconv" 132 "sync" 133 "syscall" 134 "time" 135 136 "github.com/hanwen/go-fuse/fuse" 137 "github.com/hanwen/go-fuse/fuse/nodefs" 138 "github.com/hanwen/go-fuse/fuse/pathfs" 139 "github.com/inconshreveable/log15" 140 "github.com/mitchellh/go-homedir" 141 "github.com/sb10/l15h" 142 ) 143 144 const ( 145 dirMode = 0700 146 fileMode = 0600 147 dirSize = uint64(4096) 148 symlinkSize = uint64(7) 149 ) 150 151 var ( 152 logHandlerSetter = l15h.NewChanger(log15.DiscardHandler()) 153 pkgLogger = log15.New("pkg", "muxfys") 154 exitFunc = os.Exit 155 deathSignals = []os.Signal{os.Interrupt, syscall.SIGTERM} 156 ) 157 158 func init() { 159 pkgLogger.SetHandler(l15h.ChangeableHandler(logHandlerSetter)) 160 } 161 162 // Config struct provides the configuration of a MuxFys. 163 type Config struct { 164 // Mount is the local directory to mount on top of (muxfys will try to 165 // create this if it doesn't exist). If not supplied, defaults to the 166 // subdirectory "mnt" in the current working directory. Note that mounting 167 // will only succeed if the Mount directory either doesn't exist or is 168 // empty. 169 Mount string 170 171 // Retries is the number of times to automatically retry failed remote 172 // system requests. The default of 0 means don't retry; at least 3 is 173 // recommended. 174 Retries int 175 176 // CacheBase is the base directory that will be used to create cache 177 // directories when a RemoteConfig that you Mount() has CacheData true but 178 // CacheDir undefined. Defaults to the current working directory. 179 CacheBase string 180 181 // Verbose results in every remote request getting an entry in the output of 182 // Logs(). Errors always appear there. 183 Verbose bool 184 } 185 186 // MuxFys struct is the main filey system object. 187 type MuxFys struct { 188 pathfs.FileSystem 189 mountPoint string 190 cacheBase string 191 dirAttr *fuse.Attr 192 server *fuse.Server 193 mutex sync.Mutex 194 mapMutex sync.RWMutex 195 dirs map[string][]*remote 196 dirContents map[string][]fuse.DirEntry 197 files map[string]*fuse.Attr 198 fileToRemote map[string]*remote 199 createdFiles map[string]bool 200 createdDirs map[string]bool 201 mounted bool 202 handlingSignals bool 203 deathSignals chan os.Signal 204 ignoreSignals chan bool 205 remotes []*remote 206 writeRemote *remote 207 maxAttempts int 208 logStore *l15h.Store 209 log15.Logger 210 } 211 212 // New returns a MuxFys that you'll use to Mount() your remote file systems or 213 // object stores, ensure you un-mount if killed by calling UnmountOnDeath(), 214 // then Unmount() when you're done. You might check Logs() afterwards. The other 215 // methods of MuxFys can be ignored in most cases. 216 func New(config *Config) (*MuxFys, error) { 217 mountPoint := config.Mount 218 if mountPoint == "" { 219 mountPoint = "mnt" 220 } 221 mountPoint, err := homedir.Expand(mountPoint) 222 if err != nil { 223 return nil, err 224 } 225 mountPoint, err = filepath.Abs(mountPoint) 226 if err != nil { 227 return nil, err 228 } 229 230 // create mount point if necessary 231 err = os.MkdirAll(mountPoint, os.FileMode(dirMode)) 232 if err != nil { 233 return nil, err 234 } 235 236 // check that it's empty 237 entries, err := ioutil.ReadDir(mountPoint) 238 if err != nil { 239 return nil, err 240 } 241 if len(entries) > 0 { 242 return nil, fmt.Errorf("Mount directory %s was not empty", mountPoint) 243 } 244 245 cacheBase := config.CacheBase 246 if cacheBase == "" { 247 cacheBase, err = os.Getwd() 248 if err != nil { 249 return nil, err 250 } 251 } 252 253 // make a logger with context for us, that will store log messages in memory 254 // but is also capable of logging anywhere the user wants via 255 // SetLogHandler() 256 logger := pkgLogger.New("mount", mountPoint) 257 store := l15h.NewStore() 258 logLevel := log15.LvlError 259 if config.Verbose { 260 logLevel = log15.LvlInfo 261 } 262 l15h.AddHandler(logger, log15.LvlFilterHandler(logLevel, l15h.CallerInfoHandler(l15h.StoreHandler(store, log15.LogfmtFormat())))) 263 264 // initialize ourselves 265 fs := &MuxFys{ 266 FileSystem: pathfs.NewDefaultFileSystem(), 267 mountPoint: mountPoint, 268 cacheBase: cacheBase, 269 dirs: make(map[string][]*remote), 270 dirContents: make(map[string][]fuse.DirEntry), 271 files: make(map[string]*fuse.Attr), 272 fileToRemote: make(map[string]*remote), 273 createdFiles: make(map[string]bool), 274 createdDirs: make(map[string]bool), 275 maxAttempts: config.Retries + 1, 276 logStore: store, 277 Logger: logger, 278 } 279 280 // we'll always use the same attributes for our directories 281 mTime := uint64(time.Now().Unix()) 282 fs.dirAttr = &fuse.Attr{ 283 Size: dirSize, 284 Mode: fuse.S_IFDIR | uint32(dirMode), 285 Mtime: mTime, 286 Atime: mTime, 287 Ctime: mTime, 288 } 289 290 return fs, err 291 } 292 293 // Mount carries out the mounting of your supplied RemoteConfigs to your 294 // configured mount point. On return, the files in your remote(s) will be 295 // accessible. 296 // 297 // Once mounted, you can't mount again until you Unmount(). 298 // 299 // If more than 1 RemoteConfig is supplied, the remotes will become multiplexed: 300 // your mount point will show the combined contents of all your remote systems. 301 // If multiple remotes have a directory with the same name, that directory's 302 // contents will in in turn show the contents of all those directories. If 303 // multiple remotes have a file with the same name in the same directory, reads 304 // will come from the first remote you configured that has that file. 305 func (fs *MuxFys) Mount(rcs ...*RemoteConfig) error { 306 if len(rcs) == 0 { 307 return fmt.Errorf("At least one RemoteConfig must be supplied") 308 } 309 310 fs.mutex.Lock() 311 defer fs.mutex.Unlock() 312 if fs.mounted { 313 return fmt.Errorf("Can't mount more that once at a time") 314 } 315 316 // create a remote for every RemoteConfig 317 for _, c := range rcs { 318 r, err := newRemote(c.Accessor, c.CacheData, c.CacheDir, fs.cacheBase, c.Write, fs.maxAttempts, fs.Logger) 319 if err != nil { 320 return err 321 } 322 323 fs.remotes = append(fs.remotes, r) 324 if r.write { 325 if fs.writeRemote != nil { 326 return fmt.Errorf("You can't have more than one writeable remote") 327 } 328 fs.writeRemote = r 329 } 330 } 331 332 uid, gid, err := userAndGroup() 333 if err != nil { 334 return err 335 } 336 337 opts := &nodefs.Options{ 338 NegativeTimeout: time.Second, 339 AttrTimeout: time.Second, 340 EntryTimeout: time.Second, 341 Owner: &fuse.Owner{ 342 Uid: uid, 343 Gid: gid, 344 }, 345 Debug: false, 346 } 347 pathFsOpts := &pathfs.PathNodeFsOptions{ClientInodes: false} // false means we can't hardlink, but our inodes are stable *** does it matter if they're unstable? 348 pathFs := pathfs.NewPathNodeFs(fs, pathFsOpts) 349 conn := nodefs.NewFileSystemConnector(pathFs.Root(), opts) 350 mOpts := &fuse.MountOptions{ 351 AllowOther: true, 352 FsName: "MuxFys", 353 Name: "MuxFys", 354 RememberInodes: true, 355 DisableXAttrs: true, 356 IgnoreSecurityLabels: true, 357 Debug: false, 358 } 359 fs.server, err = fuse.NewServer(conn.RawFS(), fs.mountPoint, mOpts) 360 if err != nil { 361 return err 362 } 363 364 go fs.server.Serve() 365 err = fs.server.WaitMount() 366 if err != nil { 367 return err 368 } 369 370 fs.mounted = true 371 return err 372 } 373 374 // userAndGroup returns the current uid and gid; we only ever mount with dir and 375 // file permissions for the current user. 376 func userAndGroup() (uid uint32, gid uint32, err error) { 377 user, err := user.Current() 378 if err != nil { 379 return uid, gid, err 380 } 381 382 uid64, err := strconv.ParseInt(user.Uid, 10, 32) 383 if err != nil { 384 return uid, gid, err 385 } 386 387 gid64, err := strconv.ParseInt(user.Gid, 10, 32) 388 if err != nil { 389 return uid, gid, err 390 } 391 392 return uint32(uid64), uint32(gid64), err 393 } 394 395 // UnmountOnDeath captures SIGINT (ctrl-c) and SIGTERM (kill) signals, then 396 // calls Unmount() before calling os.Exit(1 if the unmount worked, 2 otherwise) 397 // to terminate your program. Manually calling Unmount() after this cancels the 398 // signal capture. This does NOT block. 399 func (fs *MuxFys) UnmountOnDeath() { 400 fs.mutex.Lock() 401 defer fs.mutex.Unlock() 402 if !fs.mounted || fs.handlingSignals { 403 return 404 } 405 406 fs.deathSignals = make(chan os.Signal, 2) 407 signal.Notify(fs.deathSignals, deathSignals...) 408 fs.handlingSignals = true 409 fs.ignoreSignals = make(chan bool) 410 411 go func() { 412 select { 413 case <-fs.ignoreSignals: 414 signal.Stop(fs.deathSignals) 415 fs.mutex.Lock() 416 fs.handlingSignals = false 417 fs.mutex.Unlock() 418 return 419 case <-fs.deathSignals: 420 fs.mutex.Lock() 421 fs.handlingSignals = false 422 fs.mutex.Unlock() 423 err := fs.Unmount() 424 if err != nil { 425 fs.Error("Failed to unmount on death", "err", err) 426 exitFunc(2) 427 return 428 } 429 exitFunc(1) 430 return 431 } 432 }() 433 } 434 435 // Unmount must be called when you're done reading from/ writing to your 436 // remotes. Be sure to close any open filehandles before hand! 437 // 438 // It's a good idea to defer this after calling Mount(), and possibly also call 439 // UnmountOnDeath(). 440 // 441 // In CacheData mode, it is only at Unmount() that any files you created or 442 // altered get uploaded, so this may take some time. You can optionally supply a 443 // bool which if true prevents any uploads. 444 // 445 // If a remote was not configured with a specific CacheDir but CacheData was 446 // true, the CacheDir will be deleted. 447 func (fs *MuxFys) Unmount(doNotUpload ...bool) error { 448 fs.mutex.Lock() 449 defer fs.mutex.Unlock() 450 451 if fs.handlingSignals { 452 fs.ignoreSignals <- true 453 } 454 455 var err error 456 if fs.mounted { 457 err = fs.server.Unmount() 458 if err == nil { 459 fs.mounted = false 460 } 461 // <-time.After(10 * time.Second) 462 } 463 464 if !(len(doNotUpload) == 1 && doNotUpload[0]) { 465 // upload files that got opened for writing 466 uerr := fs.uploadCreated() 467 if uerr != nil { 468 if err == nil { 469 err = uerr 470 } else { 471 err = fmt.Errorf("%s; %s", err.Error(), uerr.Error()) 472 } 473 } 474 } 475 476 // delete any cachedirs we created 477 for _, remote := range fs.remotes { 478 if remote.cacheIsTmp { 479 errd := remote.deleteCache() 480 if errd != nil { 481 remote.Warn("Unmount cache deletion failed", "err", errd) 482 // *** this can fail on nfs due to "device or resource busy", 483 // but retrying doesn't help. Waiting 10s immediately before or 484 // after a failure also doesn't help; you have to always wait 485 // 10s after fs.server.Unmount() to be able to delete the cache! 486 } 487 } 488 } 489 490 // clean out our caches; one reason to unmount is to force recognition of 491 // new files when we re-mount 492 fs.mapMutex.Lock() 493 fs.dirs = make(map[string][]*remote) 494 fs.dirContents = make(map[string][]fuse.DirEntry) 495 fs.files = make(map[string]*fuse.Attr) 496 fs.fileToRemote = make(map[string]*remote) 497 fs.createdFiles = make(map[string]bool) 498 fs.createdDirs = make(map[string]bool) 499 fs.mapMutex.Unlock() 500 501 // forget our remotes so we can be remounted with other remotes 502 fs.remotes = nil 503 fs.writeRemote = nil 504 505 return err 506 } 507 508 // uploadCreated uploads any files that previously got created. Only functions 509 // in CacheData mode. 510 func (fs *MuxFys) uploadCreated() error { 511 if fs.writeRemote != nil && fs.writeRemote.cacheData { 512 fails := 0 513 514 // since mtimes in S3 are stored as the upload time, we sort our created 515 // files by their mtime to at least upload them in the correct order 516 var createdFiles []string 517 fs.mapMutex.Lock() 518 for name := range fs.createdFiles { 519 createdFiles = append(createdFiles, name) 520 } 521 if len(createdFiles) > 1 { 522 sort.Slice(createdFiles, func(i, j int) bool { 523 return fs.files[createdFiles[i]].Mtime < fs.files[createdFiles[j]].Mtime 524 }) 525 } 526 527 for _, name := range createdFiles { 528 remotePath := fs.writeRemote.getRemotePath(name) 529 localPath := fs.writeRemote.getLocalPath(remotePath) 530 531 // upload file 532 status := fs.writeRemote.uploadFile(localPath, remotePath) 533 if status != fuse.OK { 534 fails++ 535 continue 536 } 537 538 delete(fs.createdFiles, name) 539 } 540 fs.mapMutex.Unlock() 541 542 if fails > 0 { 543 return fmt.Errorf("failed to upload %d files", fails) 544 } 545 } 546 return nil 547 } 548 549 // Logs returns messages generated while mounted; you might call it after 550 // Unmount() to see how things went. 551 // 552 // By default these will only be errors that occurred, but if this MuxFys was 553 // configured with Verbose on, it will also contain informational and warning 554 // messages. 555 // 556 // If the muxfys package was configured with a log Handler (see 557 // SetLogHandler()), these same messages would have been logged as they 558 // occurred. 559 func (fs *MuxFys) Logs() []string { 560 return fs.logStore.Logs() 561 } 562 563 // SetLogHandler defines how log messages (globally for this package) are 564 // logged. Logs are always retrievable as strings from individual MuxFys 565 // instances using MuxFys.Logs(), but otherwise by default are discarded. 566 // 567 // To have them logged somewhere as they are emitted, supply a 568 // github.com/inconshreveable/log15.Handler. For example, supplying 569 // log15.StderrHandler would log everything to STDERR. 570 func SetLogHandler(h log15.Handler) { 571 logHandlerSetter.SetHandler(h) 572 } 573 574 // logClose is for use to Close() an object during a defer when you don't care 575 // if the Close() returns an error, but do want non-EOF errors logged. Extra 576 // args are passed as additional context for the logger. 577 func logClose(logger log15.Logger, obj io.Closer, msg string, extra ...interface{}) { 578 err := obj.Close() 579 if err != nil && err.Error() != "EOF" { 580 extra = append(extra, "err", err) 581 logger.Warn("failed to close "+msg, extra...) 582 } 583 }