github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/pipeline/transform/transform.go (about) 1 package transform 2 3 import ( 4 "fmt" 5 "strings" 6 7 "github.com/pachyderm/pachyderm/src/client" 8 "github.com/pachyderm/pachyderm/src/client/pfs" 9 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 10 "github.com/pachyderm/pachyderm/src/client/pps" 11 pfsserver "github.com/pachyderm/pachyderm/src/server/pfs" 12 "github.com/pachyderm/pachyderm/src/server/pkg/ppsconsts" 13 "github.com/pachyderm/pachyderm/src/server/pkg/ppsutil" 14 "github.com/pachyderm/pachyderm/src/server/worker/driver" 15 "github.com/pachyderm/pachyderm/src/server/worker/logs" 16 ) 17 18 func getStatsCommit(pipelineInfo *pps.PipelineInfo, commitInfo *pfs.CommitInfo) *pfs.Commit { 19 for _, commitRange := range commitInfo.Subvenance { 20 if commitRange.Lower.Repo.Name == pipelineInfo.Pipeline.Name && commitRange.Upper.Repo.Name == pipelineInfo.Pipeline.Name { 21 return commitRange.Lower 22 } 23 } 24 return nil 25 } 26 27 // forEachCommit listens for each READY output commit in the pipeline, and calls 28 // the given callback once for each such commit, synchronously. 29 func forEachCommit( 30 driver driver.Driver, 31 cb func(*pfs.CommitInfo, *pfs.Commit) error, 32 ) error { 33 pachClient := driver.PachClient() 34 pi := driver.PipelineInfo() 35 36 return pachClient.SubscribeCommitF( 37 pi.Pipeline.Name, 38 "", 39 client.NewCommitProvenance(ppsconsts.SpecRepo, pi.Pipeline.Name, pi.SpecCommit.ID), 40 "", 41 pfs.CommitState_READY, 42 func(ci *pfs.CommitInfo) error { 43 statsCommit := getStatsCommit(pi, ci) 44 // TODO: ensure ci and statsCommit are in a consistent state 45 if ci.Finished == nil { 46 // Inspect the commit and check again if it has been finished (it may have 47 // been closed since it was queued, e.g. by StopPipeline or StopJob) 48 if ci, err := pachClient.InspectCommit(ci.Commit.Repo.Name, ci.Commit.ID); err != nil { 49 return err 50 } else if ci.Finished == nil { 51 return cb(ci, statsCommit) 52 } else { 53 // Make sure the stats commit has been finished as the output commit has. 54 if statsCommit != nil { 55 if _, err := pachClient.PfsAPIClient.FinishCommit(pachClient.Ctx(), &pfs.FinishCommitRequest{ 56 Commit: statsCommit, 57 Empty: true, 58 }); err != nil && !pfsserver.IsCommitFinishedErr(err) { 59 return err 60 } 61 } 62 63 // Make sure that the job has been correctly finished as the commit(s) have. 64 ji, err := pachClient.InspectJobOutputCommit(ci.Commit.Repo.Name, ci.Commit.ID, false) 65 if err != nil { 66 // If no job was created for the commit, then we are done. 67 if strings.Contains(err.Error(), fmt.Sprintf("job with output commit %s not found", ci.Commit.ID)) { 68 return nil 69 } 70 return err 71 } 72 73 if !ppsutil.IsTerminal(ji.State) { 74 if ci.Trees == nil && ci.Tree == nil { 75 ji.State = pps.JobState_JOB_KILLED 76 ji.Reason = "output commit is finished without data, but job state has not been updated" 77 } else { 78 ji.State = pps.JobState_JOB_SUCCESS 79 } 80 81 if err := finishJob(pi, pachClient, ji, ji.State, ji.Reason, nil, nil, 0, nil, 0); err != nil { 82 return errors.Wrap(err, "could not update job with finished output commit") 83 } 84 } 85 } 86 } 87 return nil 88 }, 89 ) 90 } 91 92 // Run will run a transform pipeline until the driver is canceled. 93 func Run(driver driver.Driver, logger logs.TaggedLogger) error { 94 reg, err := newRegistry(logger, driver) 95 if err != nil { 96 return err 97 } 98 99 logger.Logf("transform spawner started") 100 101 // TODO: goroutine linearly waiting on jobs in the registry and cleaning up 102 // after them, bubbling up errors, canceling 103 104 return forEachCommit(driver, func(commitInfo *pfs.CommitInfo, statsCommit *pfs.Commit) error { 105 return reg.startJob(commitInfo, statsCommit) 106 }) 107 }