github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/pipeline/spout/spout.go (about)

     1  package spout
     2  
     3  import (
     4  	"golang.org/x/sync/errgroup"
     5  
     6  	"github.com/pachyderm/pachyderm/src/client/pfs"
     7  	"github.com/pachyderm/pachyderm/src/client/pps"
     8  	"github.com/pachyderm/pachyderm/src/server/worker/common"
     9  	"github.com/pachyderm/pachyderm/src/server/worker/driver"
    10  	"github.com/pachyderm/pachyderm/src/server/worker/logs"
    11  	"github.com/pachyderm/pachyderm/src/server/worker/pipeline"
    12  )
    13  
    14  // Run will run a spout pipeline until the driver is canceled.
    15  func Run(driver driver.Driver, logger logs.TaggedLogger) error {
    16  	pachClient := driver.PachClient()
    17  	pipelineInfo := driver.PipelineInfo()
    18  	logger = logger.WithJob("spout")
    19  
    20  	// Spouts typically have an open commit waiting for new data. So if the spout needs to be updated, and
    21  	// thus spoutSpawner is called, it might hang if the commit never gets closed. So to avoid this, we
    22  	// delete open commits that we see here.
    23  	// We probably only need to check the first commit, but doing 10 to be safe
    24  	pachClient.ListCommitF(pipelineInfo.Pipeline.Name, "", "", 10, false, func(c *pfs.CommitInfo) error {
    25  		if c.Finished != nil {
    26  			return nil
    27  		}
    28  		return pachClient.DeleteCommit(pipelineInfo.Pipeline.Name, c.Commit.ID)
    29  	})
    30  
    31  	// TODO: do something with stats?
    32  	_, err := driver.WithData(nil, nil, logger, func(dir string, stats *pps.ProcessStats) error {
    33  		inputs := []*common.Input{} // Spouts take no inputs
    34  		return driver.WithActiveData(inputs, dir, func() error {
    35  			eg, serviceCtx := errgroup.WithContext(pachClient.Ctx())
    36  
    37  			// While spouts do write to output commits, the output commit changes
    38  			// frequently and we do not restart the user code for each one. Therefore,
    39  			// we leave the output commit out of the user code env.
    40  			eg.Go(func() error { return pipeline.RunUserCode(driver.WithContext(serviceCtx), logger, nil, inputs) })
    41  			eg.Go(func() error { return pipeline.ReceiveSpout(serviceCtx, pachClient, pipelineInfo, logger) })
    42  			return eg.Wait()
    43  		})
    44  	})
    45  	return err
    46  }