github.com/sdboyer/gps@v0.16.3/source_manager.go (about)

     1  package gps
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"os/signal"
     8  	"path/filepath"
     9  	"runtime"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	"github.com/sdboyer/constext"
    16  	"github.com/sdboyer/gps/pkgtree"
    17  )
    18  
    19  // Used to compute a friendly filepath from a URL-shaped input.
    20  var sanitizer = strings.NewReplacer("-", "--", ":", "-", "/", "-", "+", "-")
    21  
    22  // A SourceManager is responsible for retrieving, managing, and interrogating
    23  // source repositories. Its primary purpose is to serve the needs of a Solver,
    24  // but it is handy for other purposes, as well.
    25  //
    26  // gps's built-in SourceManager, SourceMgr, is intended to be generic and
    27  // sufficient for any purpose. It provides some additional semantics around the
    28  // methods defined here.
    29  type SourceManager interface {
    30  	// SourceExists checks if a repository exists, either upstream or in the
    31  	// SourceManager's central repository cache.
    32  	SourceExists(ProjectIdentifier) (bool, error)
    33  
    34  	// SyncSourceFor will attempt to bring all local information about a source
    35  	// fully up to date.
    36  	SyncSourceFor(ProjectIdentifier) error
    37  
    38  	// ListVersions retrieves a list of the available versions for a given
    39  	// repository name.
    40  	// TODO convert to []PairedVersion
    41  	ListVersions(ProjectIdentifier) ([]PairedVersion, error)
    42  
    43  	// RevisionPresentIn indicates whether the provided Version is present in
    44  	// the given repository.
    45  	RevisionPresentIn(ProjectIdentifier, Revision) (bool, error)
    46  
    47  	// ListPackages parses the tree of the Go packages at or below root of the
    48  	// provided ProjectIdentifier, at the provided version.
    49  	ListPackages(ProjectIdentifier, Version) (pkgtree.PackageTree, error)
    50  
    51  	// GetManifestAndLock returns manifest and lock information for the provided
    52  	// root import path.
    53  	//
    54  	// gps currently requires that projects be rooted at their repository root,
    55  	// necessitating that the ProjectIdentifier's ProjectRoot must also be a
    56  	// repository root.
    57  	GetManifestAndLock(ProjectIdentifier, Version, ProjectAnalyzer) (Manifest, Lock, error)
    58  
    59  	// ExportProject writes out the tree of the provided import path, at the
    60  	// provided version, to the provided directory.
    61  	ExportProject(ProjectIdentifier, Version, string) error
    62  
    63  	// DeduceRootProject takes an import path and deduces the corresponding
    64  	// project/source root.
    65  	DeduceProjectRoot(ip string) (ProjectRoot, error)
    66  
    67  	// Release lets go of any locks held by the SourceManager. Once called, it is
    68  	// no longer safe to call methods against it; all method calls will
    69  	// immediately result in errors.
    70  	Release()
    71  }
    72  
    73  // A ProjectAnalyzer is responsible for analyzing a given path for Manifest and
    74  // Lock information. Tools relying on gps must implement one.
    75  type ProjectAnalyzer interface {
    76  	// Perform analysis of the filesystem tree rooted at path, with the
    77  	// root import path importRoot, to determine the project's constraints, as
    78  	// indicated by a Manifest and Lock.
    79  	DeriveManifestAndLock(path string, importRoot ProjectRoot) (Manifest, Lock, error)
    80  
    81  	// Report the name and version of this ProjectAnalyzer.
    82  	Info() (name string, version int)
    83  }
    84  
    85  // SourceMgr is the default SourceManager for gps.
    86  //
    87  // There's no (planned) reason why it would need to be reimplemented by other
    88  // tools; control via dependency injection is intended to be sufficient.
    89  type SourceMgr struct {
    90  	cachedir    string                // path to root of cache dir
    91  	lf          *os.File              // handle for the sm lock file on disk
    92  	suprvsr     *supervisor           // subsystem that supervises running calls/io
    93  	cancelAll   context.CancelFunc    // cancel func to kill all running work
    94  	deduceCoord *deductionCoordinator // subsystem that manages import path deduction
    95  	srcCoord    *sourceCoordinator    // subsystem that manages sources
    96  	sigmut      sync.Mutex            // mutex protecting signal handling setup/teardown
    97  	qch         chan struct{}         // quit chan for signal handler
    98  	relonce     sync.Once             // once-er to ensure we only release once
    99  	releasing   int32                 // flag indicating release of sm has begun
   100  }
   101  
   102  type smIsReleased struct{}
   103  
   104  func (smIsReleased) Error() string {
   105  	return "this SourceMgr has been released, its methods can no longer be called"
   106  }
   107  
   108  var _ SourceManager = &SourceMgr{}
   109  
   110  // NewSourceManager produces an instance of gps's built-in SourceManager. It
   111  // takes a cache directory, where local instances of upstream sources are
   112  // stored.
   113  //
   114  // The returned SourceManager aggressively caches information wherever possible.
   115  // If tools need to do preliminary work involving upstream repository analysis
   116  // prior to invoking a solve run, it is recommended that they create this
   117  // SourceManager as early as possible and use it to their ends. That way, the
   118  // solver can benefit from any caches that may have already been warmed.
   119  //
   120  // gps's SourceManager is intended to be threadsafe (if it's not, please file a
   121  // bug!). It should be safe to reuse across concurrent solving runs, even on
   122  // unrelated projects.
   123  func NewSourceManager(cachedir string) (*SourceMgr, error) {
   124  	err := os.MkdirAll(filepath.Join(cachedir, "sources"), 0777)
   125  	if err != nil {
   126  		return nil, err
   127  	}
   128  
   129  	glpath := filepath.Join(cachedir, "sm.lock")
   130  	_, err = os.Stat(glpath)
   131  	if err == nil {
   132  		return nil, CouldNotCreateLockError{
   133  			Path: glpath,
   134  			Err:  fmt.Errorf("cache lock file %s exists - another process crashed or is still running?", glpath),
   135  		}
   136  	}
   137  
   138  	fi, err := os.OpenFile(glpath, os.O_CREATE|os.O_EXCL, 0600) // is 0600 sane for this purpose?
   139  	if err != nil {
   140  		return nil, CouldNotCreateLockError{
   141  			Path: glpath,
   142  			Err:  fmt.Errorf("err on attempting to create global cache lock: %s", err),
   143  		}
   144  	}
   145  
   146  	ctx, cf := context.WithCancel(context.TODO())
   147  	superv := newSupervisor(ctx)
   148  	deducer := newDeductionCoordinator(superv)
   149  
   150  	sm := &SourceMgr{
   151  		cachedir:    cachedir,
   152  		lf:          fi,
   153  		suprvsr:     superv,
   154  		cancelAll:   cf,
   155  		deduceCoord: deducer,
   156  		srcCoord:    newSourceCoordinator(superv, deducer, cachedir),
   157  		qch:         make(chan struct{}),
   158  	}
   159  
   160  	return sm, nil
   161  }
   162  
   163  // UseDefaultSignalHandling sets up typical os.Interrupt signal handling for a
   164  // SourceMgr.
   165  func (sm *SourceMgr) UseDefaultSignalHandling() {
   166  	sigch := make(chan os.Signal, 1)
   167  	signal.Notify(sigch, os.Interrupt)
   168  	sm.HandleSignals(sigch)
   169  }
   170  
   171  // HandleSignals sets up logic to handle incoming signals with the goal of
   172  // shutting down the SourceMgr safely.
   173  //
   174  // Calling code must provide the signal channel, and is responsible for calling
   175  // signal.Notify() on that channel.
   176  //
   177  // Successive calls to HandleSignals() will deregister the previous handler and
   178  // set up a new one. It is not recommended that the same channel be passed
   179  // multiple times to this method.
   180  //
   181  // SetUpSigHandling() will set up a handler that is appropriate for most
   182  // use cases.
   183  func (sm *SourceMgr) HandleSignals(sigch chan os.Signal) {
   184  	sm.sigmut.Lock()
   185  	// always start by closing the qch, which will lead to any existing signal
   186  	// handler terminating, and deregistering its sigch.
   187  	if sm.qch != nil {
   188  		close(sm.qch)
   189  	}
   190  	sm.qch = make(chan struct{})
   191  
   192  	// Run a new goroutine with the input sigch and the fresh qch
   193  	go func(sch chan os.Signal, qch <-chan struct{}) {
   194  		defer signal.Stop(sch)
   195  		for {
   196  			select {
   197  			case <-sch:
   198  				// Set up a timer to uninstall the signal handler after three
   199  				// seconds, so that the user can easily force termination with a
   200  				// second ctrl-c
   201  				go func(c <-chan time.Time) {
   202  					<-c
   203  					signal.Stop(sch)
   204  				}(time.After(3 * time.Second))
   205  
   206  				if !atomic.CompareAndSwapInt32(&sm.releasing, 0, 1) {
   207  					// Something's already called Release() on this sm, so we
   208  					// don't have to do anything, as we'd just be redoing
   209  					// that work. Instead, deregister and return.
   210  					return
   211  				}
   212  
   213  				opc := sm.suprvsr.count()
   214  				if opc > 0 {
   215  					fmt.Printf("Signal received: waiting for %v ops to complete...\n", opc)
   216  				}
   217  
   218  				// Mutex interaction in a signal handler is, as a general rule,
   219  				// unsafe. I'm not clear on whether the guarantees Go provides
   220  				// around signal handling, or having passed this through a
   221  				// channel in general, obviate those concerns, but it's a lot
   222  				// easier to just rely on the mutex contained in the Once right
   223  				// now, so do that until it proves problematic or someone
   224  				// provides a clear explanation.
   225  				sm.relonce.Do(func() { sm.doRelease() })
   226  				return
   227  			case <-qch:
   228  				// quit channel triggered - deregister our sigch and return
   229  				return
   230  			}
   231  		}
   232  	}(sigch, sm.qch)
   233  	// Try to ensure handler is blocked in for-select before releasing the mutex
   234  	runtime.Gosched()
   235  
   236  	sm.sigmut.Unlock()
   237  }
   238  
   239  // StopSignalHandling deregisters any signal handler running on this SourceMgr.
   240  //
   241  // It's normally not necessary to call this directly; it will be called as
   242  // needed by Release().
   243  func (sm *SourceMgr) StopSignalHandling() {
   244  	sm.sigmut.Lock()
   245  	if sm.qch != nil {
   246  		close(sm.qch)
   247  		sm.qch = nil
   248  		runtime.Gosched()
   249  	}
   250  	sm.sigmut.Unlock()
   251  }
   252  
   253  // CouldNotCreateLockError describe failure modes in which creating a SourceMgr
   254  // did not succeed because there was an error while attempting to create the
   255  // on-disk lock file.
   256  type CouldNotCreateLockError struct {
   257  	Path string
   258  	Err  error
   259  }
   260  
   261  func (e CouldNotCreateLockError) Error() string {
   262  	return e.Err.Error()
   263  }
   264  
   265  // Release lets go of any locks held by the SourceManager. Once called, it is no
   266  // longer safe to call methods against it; all method calls will immediately
   267  // result in errors.
   268  func (sm *SourceMgr) Release() {
   269  	// Set sm.releasing before entering the Once func to guarantee that no
   270  	// _more_ method calls will stack up if/while waiting.
   271  	atomic.CompareAndSwapInt32(&sm.releasing, 0, 1)
   272  
   273  	// Whether 'releasing' is set or not, we don't want this function to return
   274  	// until after the doRelease process is done, as doing so could cause the
   275  	// process to terminate before a signal-driven doRelease() call has a chance
   276  	// to finish its cleanup.
   277  	sm.relonce.Do(func() { sm.doRelease() })
   278  }
   279  
   280  // doRelease actually releases physical resources (files on disk, etc.).
   281  //
   282  // This must be called only and exactly once. Calls to it should be wrapped in
   283  // the sm.relonce sync.Once instance.
   284  func (sm *SourceMgr) doRelease() {
   285  	// Send the signal to the supervisor to cancel all running calls
   286  	sm.cancelAll()
   287  	sm.suprvsr.wait()
   288  
   289  	// Close the file handle for the lock file and remove it from disk
   290  	sm.lf.Close()
   291  	os.Remove(filepath.Join(sm.cachedir, "sm.lock"))
   292  
   293  	// Close the qch, if non-nil, so the signal handlers run out. This will
   294  	// also deregister the sig channel, if any has been set up.
   295  	if sm.qch != nil {
   296  		close(sm.qch)
   297  	}
   298  }
   299  
   300  // GetManifestAndLock returns manifest and lock information for the provided
   301  // ProjectIdentifier, at the provided Version. The work of producing the
   302  // manifest and lock is delegated to the provided ProjectAnalyzer's
   303  // DeriveManifestAndLock() method.
   304  func (sm *SourceMgr) GetManifestAndLock(id ProjectIdentifier, v Version, an ProjectAnalyzer) (Manifest, Lock, error) {
   305  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   306  		return nil, nil, smIsReleased{}
   307  	}
   308  
   309  	srcg, err := sm.srcCoord.getSourceGatewayFor(context.TODO(), id)
   310  	if err != nil {
   311  		return nil, nil, err
   312  	}
   313  
   314  	return srcg.getManifestAndLock(context.TODO(), id.ProjectRoot, v, an)
   315  }
   316  
   317  // ListPackages parses the tree of the Go packages at and below the ProjectRoot
   318  // of the given ProjectIdentifier, at the given version.
   319  func (sm *SourceMgr) ListPackages(id ProjectIdentifier, v Version) (pkgtree.PackageTree, error) {
   320  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   321  		return pkgtree.PackageTree{}, smIsReleased{}
   322  	}
   323  
   324  	srcg, err := sm.srcCoord.getSourceGatewayFor(context.TODO(), id)
   325  	if err != nil {
   326  		return pkgtree.PackageTree{}, err
   327  	}
   328  
   329  	return srcg.listPackages(context.TODO(), id.ProjectRoot, v)
   330  }
   331  
   332  // ListVersions retrieves a list of the available versions for a given
   333  // repository name.
   334  //
   335  // The list is not sorted; while it may be returned in the order that the
   336  // underlying VCS reports version information, no guarantee is made. It is
   337  // expected that the caller either not care about order, or sort the result
   338  // themselves.
   339  //
   340  // This list is always retrieved from upstream on the first call. Subsequent
   341  // calls will return a cached version of the first call's results. if upstream
   342  // is not accessible (network outage, access issues, or the resource actually
   343  // went away), an error will be returned.
   344  func (sm *SourceMgr) ListVersions(id ProjectIdentifier) ([]PairedVersion, error) {
   345  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   346  		return nil, smIsReleased{}
   347  	}
   348  
   349  	srcg, err := sm.srcCoord.getSourceGatewayFor(context.TODO(), id)
   350  	if err != nil {
   351  		// TODO(sdboyer) More-er proper-er errors
   352  		return nil, err
   353  	}
   354  
   355  	return srcg.listVersions(context.TODO())
   356  }
   357  
   358  // RevisionPresentIn indicates whether the provided Revision is present in the given
   359  // repository.
   360  func (sm *SourceMgr) RevisionPresentIn(id ProjectIdentifier, r Revision) (bool, error) {
   361  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   362  		return false, smIsReleased{}
   363  	}
   364  
   365  	srcg, err := sm.srcCoord.getSourceGatewayFor(context.TODO(), id)
   366  	if err != nil {
   367  		// TODO(sdboyer) More-er proper-er errors
   368  		return false, err
   369  	}
   370  
   371  	return srcg.revisionPresentIn(context.TODO(), r)
   372  }
   373  
   374  // SourceExists checks if a repository exists, either upstream or in the cache,
   375  // for the provided ProjectIdentifier.
   376  func (sm *SourceMgr) SourceExists(id ProjectIdentifier) (bool, error) {
   377  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   378  		return false, smIsReleased{}
   379  	}
   380  
   381  	srcg, err := sm.srcCoord.getSourceGatewayFor(context.TODO(), id)
   382  	if err != nil {
   383  		return false, err
   384  	}
   385  
   386  	ctx := context.TODO()
   387  	return srcg.existsInCache(ctx) || srcg.existsUpstream(ctx), nil
   388  }
   389  
   390  // SyncSourceFor will ensure that all local caches and information about a
   391  // source are up to date with any network-acccesible information.
   392  //
   393  // The primary use case for this is prefetching.
   394  func (sm *SourceMgr) SyncSourceFor(id ProjectIdentifier) error {
   395  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   396  		return smIsReleased{}
   397  	}
   398  
   399  	srcg, err := sm.srcCoord.getSourceGatewayFor(context.TODO(), id)
   400  	if err != nil {
   401  		return err
   402  	}
   403  
   404  	return srcg.syncLocal(context.TODO())
   405  }
   406  
   407  // ExportProject writes out the tree of the provided ProjectIdentifier's
   408  // ProjectRoot, at the provided version, to the provided directory.
   409  func (sm *SourceMgr) ExportProject(id ProjectIdentifier, v Version, to string) error {
   410  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   411  		return smIsReleased{}
   412  	}
   413  
   414  	srcg, err := sm.srcCoord.getSourceGatewayFor(context.TODO(), id)
   415  	if err != nil {
   416  		return err
   417  	}
   418  
   419  	return srcg.exportVersionTo(context.TODO(), v, to)
   420  }
   421  
   422  // DeduceProjectRoot takes an import path and deduces the corresponding
   423  // project/source root.
   424  //
   425  // Note that some import paths may require network activity to correctly
   426  // determine the root of the path, such as, but not limited to, vanity import
   427  // paths. (A special exception is written for gopkg.in to minimize network
   428  // activity, as its behavior is well-structured)
   429  func (sm *SourceMgr) DeduceProjectRoot(ip string) (ProjectRoot, error) {
   430  	if atomic.CompareAndSwapInt32(&sm.releasing, 1, 1) {
   431  		return "", smIsReleased{}
   432  	}
   433  
   434  	pd, err := sm.deduceCoord.deduceRootPath(context.TODO(), ip)
   435  	return ProjectRoot(pd.root), err
   436  }
   437  
   438  type timeCount struct {
   439  	count int
   440  	start time.Time
   441  }
   442  
   443  type durCount struct {
   444  	count int
   445  	dur   time.Duration
   446  }
   447  
   448  type supervisor struct {
   449  	ctx        context.Context
   450  	cancelFunc context.CancelFunc
   451  	mu         sync.Mutex // Guards all maps
   452  	cond       sync.Cond  // Wraps mu so callers can wait until all calls end
   453  	running    map[callInfo]timeCount
   454  	ran        map[callType]durCount
   455  }
   456  
   457  func newSupervisor(ctx context.Context) *supervisor {
   458  	ctx, cf := context.WithCancel(ctx)
   459  	supv := &supervisor{
   460  		ctx:        ctx,
   461  		cancelFunc: cf,
   462  		running:    make(map[callInfo]timeCount),
   463  		ran:        make(map[callType]durCount),
   464  	}
   465  
   466  	supv.cond = sync.Cond{L: &supv.mu}
   467  	return supv
   468  }
   469  
   470  // do executes the incoming closure using a conjoined context, and keeps
   471  // counters to ensure the sourceMgr can't finish Release()ing until after all
   472  // calls have returned.
   473  func (sup *supervisor) do(inctx context.Context, name string, typ callType, f func(context.Context) error) error {
   474  	ci := callInfo{
   475  		name: name,
   476  		typ:  typ,
   477  	}
   478  
   479  	octx, err := sup.start(ci)
   480  	if err != nil {
   481  		return err
   482  	}
   483  
   484  	cctx, cancelFunc := constext.Cons(inctx, octx)
   485  	err = f(cctx)
   486  	sup.done(ci)
   487  	cancelFunc()
   488  	return err
   489  }
   490  
   491  func (sup *supervisor) getLifetimeContext() context.Context {
   492  	return sup.ctx
   493  }
   494  
   495  func (sup *supervisor) start(ci callInfo) (context.Context, error) {
   496  	sup.mu.Lock()
   497  	defer sup.mu.Unlock()
   498  	if sup.ctx.Err() != nil {
   499  		// We've already been canceled; error out.
   500  		return nil, sup.ctx.Err()
   501  	}
   502  
   503  	if existingInfo, has := sup.running[ci]; has {
   504  		existingInfo.count++
   505  		sup.running[ci] = existingInfo
   506  	} else {
   507  		sup.running[ci] = timeCount{
   508  			count: 1,
   509  			start: time.Now(),
   510  		}
   511  	}
   512  
   513  	return sup.ctx, nil
   514  }
   515  
   516  func (sup *supervisor) count() int {
   517  	sup.mu.Lock()
   518  	defer sup.mu.Unlock()
   519  	return len(sup.running)
   520  }
   521  
   522  func (sup *supervisor) done(ci callInfo) {
   523  	sup.mu.Lock()
   524  
   525  	existingInfo, has := sup.running[ci]
   526  	if !has {
   527  		panic(fmt.Sprintf("sourceMgr: tried to complete a call that had not registered via run()"))
   528  	}
   529  
   530  	if existingInfo.count > 1 {
   531  		// If more than one is pending, don't stop the clock yet.
   532  		existingInfo.count--
   533  		sup.running[ci] = existingInfo
   534  	} else {
   535  		// Last one for this particular key; update metrics with info.
   536  		durCnt := sup.ran[ci.typ]
   537  		durCnt.count++
   538  		durCnt.dur += time.Now().Sub(existingInfo.start)
   539  		sup.ran[ci.typ] = durCnt
   540  		delete(sup.running, ci)
   541  
   542  		if len(sup.running) == 0 {
   543  			// This is the only place where we signal the cond, as it's the only
   544  			// time that the number of running calls could become zero.
   545  			sup.cond.Signal()
   546  		}
   547  	}
   548  	sup.mu.Unlock()
   549  }
   550  
   551  // wait until all active calls have terminated.
   552  //
   553  // Assumes something else has already canceled the supervisor via its context.
   554  func (sup *supervisor) wait() {
   555  	sup.cond.L.Lock()
   556  	for len(sup.running) > 0 {
   557  		sup.cond.Wait()
   558  	}
   559  	sup.cond.L.Unlock()
   560  }
   561  
   562  type callType uint
   563  
   564  const (
   565  	ctHTTPMetadata callType = iota
   566  	ctListVersions
   567  	ctGetManifestAndLock
   568  	ctListPackages
   569  	ctSourcePing
   570  	ctSourceInit
   571  	ctSourceFetch
   572  	ctCheckoutVersion
   573  	ctExportTree
   574  )
   575  
   576  // callInfo provides metadata about an ongoing call.
   577  type callInfo struct {
   578  	name string
   579  	typ  callType
   580  }