github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/pipeline/transform/transform.go (about)

     1  package transform
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  
     7  	"github.com/pachyderm/pachyderm/src/client"
     8  	"github.com/pachyderm/pachyderm/src/client/pfs"
     9  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    10  	"github.com/pachyderm/pachyderm/src/client/pps"
    11  	pfsserver "github.com/pachyderm/pachyderm/src/server/pfs"
    12  	"github.com/pachyderm/pachyderm/src/server/pkg/ppsconsts"
    13  	"github.com/pachyderm/pachyderm/src/server/pkg/ppsutil"
    14  	"github.com/pachyderm/pachyderm/src/server/worker/driver"
    15  	"github.com/pachyderm/pachyderm/src/server/worker/logs"
    16  )
    17  
    18  func getStatsCommit(pipelineInfo *pps.PipelineInfo, commitInfo *pfs.CommitInfo) *pfs.Commit {
    19  	for _, commitRange := range commitInfo.Subvenance {
    20  		if commitRange.Lower.Repo.Name == pipelineInfo.Pipeline.Name && commitRange.Upper.Repo.Name == pipelineInfo.Pipeline.Name {
    21  			return commitRange.Lower
    22  		}
    23  	}
    24  	return nil
    25  }
    26  
    27  // forEachCommit listens for each READY output commit in the pipeline, and calls
    28  // the given callback once for each such commit, synchronously.
    29  func forEachCommit(
    30  	driver driver.Driver,
    31  	cb func(*pfs.CommitInfo, *pfs.Commit) error,
    32  ) error {
    33  	pachClient := driver.PachClient()
    34  	pi := driver.PipelineInfo()
    35  
    36  	return pachClient.SubscribeCommitF(
    37  		pi.Pipeline.Name,
    38  		"",
    39  		client.NewCommitProvenance(ppsconsts.SpecRepo, pi.Pipeline.Name, pi.SpecCommit.ID),
    40  		"",
    41  		pfs.CommitState_READY,
    42  		func(ci *pfs.CommitInfo) error {
    43  			statsCommit := getStatsCommit(pi, ci)
    44  			// TODO: ensure ci and statsCommit are in a consistent state
    45  			if ci.Finished == nil {
    46  				// Inspect the commit and check again if it has been finished (it may have
    47  				// been closed since it was queued, e.g. by StopPipeline or StopJob)
    48  				if ci, err := pachClient.InspectCommit(ci.Commit.Repo.Name, ci.Commit.ID); err != nil {
    49  					return err
    50  				} else if ci.Finished == nil {
    51  					return cb(ci, statsCommit)
    52  				} else {
    53  					// Make sure the stats commit has been finished as the output commit has.
    54  					if statsCommit != nil {
    55  						if _, err := pachClient.PfsAPIClient.FinishCommit(pachClient.Ctx(), &pfs.FinishCommitRequest{
    56  							Commit: statsCommit,
    57  							Empty:  true,
    58  						}); err != nil && !pfsserver.IsCommitFinishedErr(err) {
    59  							return err
    60  						}
    61  					}
    62  
    63  					// Make sure that the job has been correctly finished as the commit(s) have.
    64  					ji, err := pachClient.InspectJobOutputCommit(ci.Commit.Repo.Name, ci.Commit.ID, false)
    65  					if err != nil {
    66  						// If no job was created for the commit, then we are done.
    67  						if strings.Contains(err.Error(), fmt.Sprintf("job with output commit %s not found", ci.Commit.ID)) {
    68  							return nil
    69  						}
    70  						return err
    71  					}
    72  
    73  					if !ppsutil.IsTerminal(ji.State) {
    74  						if ci.Trees == nil && ci.Tree == nil {
    75  							ji.State = pps.JobState_JOB_KILLED
    76  							ji.Reason = "output commit is finished without data, but job state has not been updated"
    77  						} else {
    78  							ji.State = pps.JobState_JOB_SUCCESS
    79  						}
    80  
    81  						if err := finishJob(pi, pachClient, ji, ji.State, ji.Reason, nil, nil, 0, nil, 0); err != nil {
    82  							return errors.Wrap(err, "could not update job with finished output commit")
    83  						}
    84  					}
    85  				}
    86  			}
    87  			return nil
    88  		},
    89  	)
    90  }
    91  
    92  // Run will run a transform pipeline until the driver is canceled.
    93  func Run(driver driver.Driver, logger logs.TaggedLogger) error {
    94  	reg, err := newRegistry(logger, driver)
    95  	if err != nil {
    96  		return err
    97  	}
    98  
    99  	logger.Logf("transform spawner started")
   100  
   101  	// TODO: goroutine linearly waiting on jobs in the registry and cleaning up
   102  	// after them, bubbling up errors, canceling
   103  
   104  	return forEachCommit(driver, func(commitInfo *pfs.CommitInfo, statsCommit *pfs.Commit) error {
   105  		return reg.startJob(commitInfo, statsCommit)
   106  	})
   107  }