github.com/pachyderm/pachyderm@v1.13.4/src/client/pps.go

github.com/pachyderm/pachyderm@v1.13.4/src/client/pps.go (about)

     1  package client
     2  
     3  import (
     4  	"crypto/sha256"
     5  	"encoding/hex"
     6  	"io"
     7  	"time"
     8  
     9  	"github.com/pachyderm/pachyderm/src/client/pfs"
    10  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    11  	"github.com/pachyderm/pachyderm/src/client/pkg/grpcutil"
    12  	"github.com/pachyderm/pachyderm/src/client/pps"
    13  	"github.com/pachyderm/pachyderm/src/server/pkg/errutil"
    14  
    15  	"github.com/gogo/protobuf/types"
    16  )
    17  
    18  const (
    19  	// PPSEtcdPrefixEnv is the environment variable that specifies the etcd
    20  	// prefix that PPS uses.
    21  	PPSEtcdPrefixEnv = "PPS_ETCD_PREFIX"
    22  	// PPSWorkerIPEnv is the environment variable that a worker can use to
    23  	// see its own IP.  The IP address is made available through the
    24  	// Kubernetes downward API.
    25  	PPSWorkerIPEnv = "PPS_WORKER_IP"
    26  	// PPSPodNameEnv is the environment variable that a pod can use to
    27  	// see its own name.  The pod name is made available through the
    28  	// Kubernetes downward API.
    29  	PPSPodNameEnv = "PPS_POD_NAME"
    30  	// PPSPipelineNameEnv is the env var that sets the name of the pipeline
    31  	// that the workers are running.
    32  	PPSPipelineNameEnv = "PPS_PIPELINE_NAME"
    33  	// PPSJobIDEnv is the env var that sets the ID of the job that the
    34  	// workers are running (if the workers belong to an orphan job, rather than a
    35  	// pipeline).
    36  	PPSJobIDEnv = "PPS_JOB_ID"
    37  	// PPSSpecCommitEnv is the namespace in which pachyderm is deployed
    38  	PPSSpecCommitEnv = "PPS_SPEC_COMMIT"
    39  	// PPSInputPrefix is the prefix of the path where datums are downloaded
    40  	// to.  A datum of an input named `XXX` is downloaded to `/pfs/XXX/`.
    41  	PPSInputPrefix = "/pfs"
    42  	// PPSScratchSpace is where pps workers store data while it's waiting to be
    43  	// processed.
    44  	PPSScratchSpace = ".scratch"
    45  	// PPSWorkerPortEnv is environment variable name for the port that workers
    46  	// use for their gRPC server
    47  	PPSWorkerPortEnv = "PPS_WORKER_GRPC_PORT"
    48  	// PPSWorkerVolume is the name of the volume in which workers store
    49  	// data.
    50  	PPSWorkerVolume = "pachyderm-worker"
    51  	// PPSWorkerUserContainerName is the name of the container that runs
    52  	// the user code to process data.
    53  	PPSWorkerUserContainerName = "user"
    54  	// PPSWorkerSidecarContainerName is the name of the sidecar container
    55  	// that runs alongside of each worker container.
    56  	PPSWorkerSidecarContainerName = "storage"
    57  	// GCGenerationKey is the etcd key that stores a counter that the
    58  	// GC utility increments when it runs, so as to invalidate all cache.
    59  	GCGenerationKey = "gc-generation"
    60  	// JobIDEnv is an env var that is added to the environment of user pipeline
    61  	// code and indicates the id of the job currently being run.
    62  	JobIDEnv = "PACH_JOB_ID"
    63  	// OutputCommitIDEnv is an env var that is added to the environment of user
    64  	// pipelined code and indicates the id of the output commit.
    65  	OutputCommitIDEnv = "PACH_OUTPUT_COMMIT_ID"
    66  	// PeerPortEnv is the env var that sets a custom peer port
    67  	PeerPortEnv = "PEER_PORT"
    68  
    69  	// ReprocessSpecUntilSuccess is used in the pipeline.ReprocessSpec field. It
    70  	// is the default behavior of reattempting failed datums in each job.
    71  	ReprocessSpecUntilSuccess = "until_success"
    72  	// ReprocessSpecEveryJob is used in the pipeline.ReprocessSpec field. With
    73  	// this, a pipeline will reprocess every datum in every job, regardless of if
    74  	// it succeeded or failed previously.
    75  	ReprocessSpecEveryJob = "every_job"
    76  )
    77  
    78  // NewJob creates a pps.Job.
    79  func NewJob(jobID string) *pps.Job {
    80  	return &pps.Job{ID: jobID}
    81  }
    82  
    83  // DatumTagPrefix hashes a pipeline salt to a string of a fixed size for use as
    84  // the prefix for datum output trees. This prefix allows us to do garbage
    85  // collection correctly.
    86  func DatumTagPrefix(salt string) string {
    87  	// We need to hash the salt because UUIDs are not necessarily
    88  	// random in every bit.
    89  	h := sha256.New()
    90  	h.Write([]byte(salt))
    91  	return hex.EncodeToString(h.Sum(nil))[:4]
    92  }
    93  
    94  // NewPFSInput returns a new PFS input. It only includes required options.
    95  func NewPFSInput(repo string, glob string) *pps.Input {
    96  	return &pps.Input{
    97  		Pfs: &pps.PFSInput{
    98  			Repo: repo,
    99  			Glob: glob,
   100  		},
   101  	}
   102  }
   103  
   104  // NewPFSInputOpts returns a new PFS input. It includes all options.
   105  func NewPFSInputOpts(name string, repo string, branch string, glob string, joinOn string, groupBy string, outerJoin bool, lazy bool, trigger *pfs.Trigger) *pps.Input {
   106  	return &pps.Input{
   107  		Pfs: &pps.PFSInput{
   108  			Name:      name,
   109  			Repo:      repo,
   110  			Branch:    branch,
   111  			Glob:      glob,
   112  			JoinOn:    joinOn,
   113  			OuterJoin: outerJoin,
   114  			GroupBy:   groupBy,
   115  			Lazy:      lazy,
   116  			Trigger:   trigger,
   117  		},
   118  	}
   119  }
   120  
   121  // NewS3PFSInput returns a new PFS input with 'S3' set.
   122  func NewS3PFSInput(name string, repo string, branch string) *pps.Input {
   123  	return &pps.Input{
   124  		Pfs: &pps.PFSInput{
   125  			Name:   name,
   126  			Repo:   repo,
   127  			Branch: branch,
   128  			S3:     true,
   129  		},
   130  	}
   131  }
   132  
   133  // NewCrossInput returns an input which is the cross product of other inputs.
   134  // That means that all combination of datums will be seen by the job /
   135  // pipeline.
   136  func NewCrossInput(input ...*pps.Input) *pps.Input {
   137  	return &pps.Input{
   138  		Cross: input,
   139  	}
   140  }
   141  
   142  // NewJoinInput returns an input which is the join of other inputs.
   143  // That means that all combination of datums which match on `joinOn` will be seen by the job /
   144  // pipeline.
   145  func NewJoinInput(input ...*pps.Input) *pps.Input {
   146  	return &pps.Input{
   147  		Join: input,
   148  	}
   149  }
   150  
   151  // NewUnionInput returns an input which is the union of other inputs. That
   152  // means that all datums from any of the inputs will be seen individually by
   153  // the job / pipeline.
   154  func NewUnionInput(input ...*pps.Input) *pps.Input {
   155  	return &pps.Input{
   156  		Union: input,
   157  	}
   158  }
   159  
   160  // NewGroupInput returns an input which groups the inputs by the GroupBy pattern.
   161  // That means that it will return a datum for each group of input datums matching
   162  // a particular GroupBy pattern
   163  func NewGroupInput(input ...*pps.Input) *pps.Input {
   164  	return &pps.Input{
   165  		Group: input,
   166  	}
   167  }
   168  
   169  // NewCronInput returns an input which will trigger based on a timed schedule.
   170  // It uses cron syntax to specify the schedule. The input will be exposed to
   171  // jobs as `/pfs/<name>/<timestamp>`. The timestamp uses the RFC 3339 format,
   172  // e.g. `2006-01-02T15:04:05Z07:00`. It only takes required options.
   173  func NewCronInput(name string, spec string) *pps.Input {
   174  	return &pps.Input{
   175  		Cron: &pps.CronInput{
   176  			Name: name,
   177  			Spec: spec,
   178  		},
   179  	}
   180  }
   181  
   182  // NewCronInputOpts returns an input which will trigger based on a timed schedule.
   183  // It uses cron syntax to specify the schedule. The input will be exposed to
   184  // jobs as `/pfs/<name>/<timestamp>`. The timestamp uses the RFC 3339 format,
   185  // e.g. `2006-01-02T15:04:05Z07:00`. It includes all the options.
   186  func NewCronInputOpts(name string, repo string, spec string, overwrite bool) *pps.Input {
   187  	return &pps.Input{
   188  		Cron: &pps.CronInput{
   189  			Name:      name,
   190  			Repo:      repo,
   191  			Spec:      spec,
   192  			Overwrite: overwrite,
   193  		},
   194  	}
   195  }
   196  
   197  // NewJobInput creates a pps.JobInput.
   198  func NewJobInput(repoName string, commitID string, glob string) *pps.JobInput {
   199  	return &pps.JobInput{
   200  		Commit: NewCommit(repoName, commitID),
   201  		Glob:   glob,
   202  	}
   203  }
   204  
   205  // NewPipeline creates a pps.Pipeline.
   206  func NewPipeline(pipelineName string) *pps.Pipeline {
   207  	return &pps.Pipeline{Name: pipelineName}
   208  }
   209  
   210  // CreateJob creates and runs a job in PPS.
   211  // This function is mostly useful internally, users should generally run work
   212  // by creating pipelines as well.
   213  func (c APIClient) CreateJob(pipeline string, outputCommit, statsCommit *pfs.Commit) (*pps.Job, error) {
   214  	job, err := c.PpsAPIClient.CreateJob(
   215  		c.Ctx(),
   216  		&pps.CreateJobRequest{
   217  			Pipeline:     NewPipeline(pipeline),
   218  			OutputCommit: outputCommit,
   219  			StatsCommit:  statsCommit,
   220  		},
   221  	)
   222  	return job, grpcutil.ScrubGRPC(err)
   223  }
   224  
   225  // InspectJob returns info about a specific job.
   226  // blockState will cause the call to block until the job reaches a terminal state (failure or success).
   227  // full indicates that the full job info should be returned.
   228  func (c APIClient) InspectJob(jobID string, blockState bool, full ...bool) (*pps.JobInfo, error) {
   229  	req := &pps.InspectJobRequest{
   230  		Job:        NewJob(jobID),
   231  		BlockState: blockState,
   232  	}
   233  	if len(full) > 0 {
   234  		req.Full = full[0]
   235  	}
   236  	jobInfo, err := c.PpsAPIClient.InspectJob(c.Ctx(), req)
   237  	return jobInfo, grpcutil.ScrubGRPC(err)
   238  }
   239  
   240  // InspectJobOutputCommit returns info about a job that created a commit.
   241  // blockState will cause the call to block until the job reaches a terminal state (failure or success).
   242  func (c APIClient) InspectJobOutputCommit(repoName, commitID string, blockState bool) (*pps.JobInfo, error) {
   243  	jobInfo, err := c.PpsAPIClient.InspectJob(
   244  		c.Ctx(),
   245  		&pps.InspectJobRequest{
   246  			OutputCommit: NewCommit(repoName, commitID),
   247  			BlockState:   blockState,
   248  		})
   249  	return jobInfo, grpcutil.ScrubGRPC(err)
   250  }
   251  
   252  // ListJob returns info about all jobs.
   253  // If pipelineName is non empty then only jobs that were started by the named pipeline will be returned
   254  // If inputCommit is non-nil then only jobs which took the specific commits as inputs will be returned.
   255  // The order of the inputCommits doesn't matter.
   256  // If outputCommit is non-nil then only the job which created that commit as output will be returned.
   257  // 'history' controls whether jobs from historical versions of pipelines are returned, it has the following semantics:
   258  // 0: Return jobs from the current version of the pipeline or pipelines.
   259  // 1: Return the above and jobs from the next most recent version
   260  // 2: etc.
   261  //-1: Return jobs from all historical versions.
   262  // 'includePipelineInfo' controls whether the JobInfo passed to 'f' includes
   263  // details fromt the pipeline spec (e.g. the transform). Leaving this 'false'
   264  // can improve performance.
   265  func (c APIClient) ListJob(pipelineName string, inputCommit []*pfs.Commit, outputCommit *pfs.Commit, history int64, includePipelineInfo bool) ([]*pps.JobInfo, error) {
   266  	var result []*pps.JobInfo
   267  	if err := c.ListJobF(pipelineName, inputCommit, outputCommit, history,
   268  		includePipelineInfo, func(ji *pps.JobInfo) error {
   269  			result = append(result, ji)
   270  			return nil
   271  		}); err != nil {
   272  		return nil, err
   273  	}
   274  	return result, nil
   275  }
   276  
   277  // ListJobF is a previous version of ListJobFilterF, returning info about all jobs
   278  // and calling f on each JobInfo
   279  func (c APIClient) ListJobF(pipelineName string, inputCommit []*pfs.Commit,
   280  	outputCommit *pfs.Commit, history int64, includePipelineInfo bool,
   281  	f func(*pps.JobInfo) error) error {
   282  	return c.ListJobFilterF(pipelineName, inputCommit, outputCommit, history, includePipelineInfo, "", f)
   283  }
   284  
   285  // ListJobFilterF returns info about all jobs, calling f with each JobInfo.
   286  // If f returns an error iteration of jobs will stop and ListJobF will return
   287  // that error, unless the error is errutil.ErrBreak in which case it will
   288  // return nil.
   289  // If pipelineName is non empty then only jobs that were started by the named pipeline will be returned
   290  // If inputCommit is non-nil then only jobs which took the specific commits as inputs will be returned.
   291  // The order of the inputCommits doesn't matter.
   292  // If outputCommit is non-nil then only the job which created that commit as output will be returned.
   293  // 'history' controls whether jobs from historical versions of pipelines are returned, it has the following semantics:
   294  // 0: Return jobs from the current version of the pipeline or pipelines.
   295  // 1: Return the above and jobs from the next most recent version
   296  // 2: etc.
   297  //-1: Return jobs from all historical versions.
   298  // 'includePipelineInfo' controls whether the JobInfo passed to 'f' includes
   299  // details fromt the pipeline spec--setting this to 'false' can improve
   300  // performance.
   301  func (c APIClient) ListJobFilterF(pipelineName string, inputCommit []*pfs.Commit,
   302  	outputCommit *pfs.Commit, history int64, includePipelineInfo bool, jqFilter string,
   303  	f func(*pps.JobInfo) error) error {
   304  	var pipeline *pps.Pipeline
   305  	if pipelineName != "" {
   306  		pipeline = NewPipeline(pipelineName)
   307  	}
   308  	client, err := c.PpsAPIClient.ListJobStream(
   309  		c.Ctx(),
   310  		&pps.ListJobRequest{
   311  			Pipeline:     pipeline,
   312  			InputCommit:  inputCommit,
   313  			OutputCommit: outputCommit,
   314  			History:      history,
   315  			Full:         includePipelineInfo,
   316  			JqFilter:     jqFilter,
   317  		})
   318  	if err != nil {
   319  		return grpcutil.ScrubGRPC(err)
   320  	}
   321  	for {
   322  		ji, err := client.Recv()
   323  		if errors.Is(err, io.EOF) {
   324  			return nil
   325  		} else if err != nil {
   326  			return grpcutil.ScrubGRPC(err)
   327  		}
   328  		if err := f(ji); err != nil {
   329  			if errors.Is(err, errutil.ErrBreak) {
   330  				return nil
   331  			}
   332  			return err
   333  		}
   334  	}
   335  }
   336  
   337  // FlushJob calls f with all the jobs which were triggered by commits.
   338  // If toPipelines is non-nil then only the jobs between commits and those
   339  // pipelines in the DAG will be returned.
   340  func (c APIClient) FlushJob(commits []*pfs.Commit, toPipelines []string, f func(*pps.JobInfo) error) error {
   341  	req := &pps.FlushJobRequest{
   342  		Commits: commits,
   343  	}
   344  	for _, pipeline := range toPipelines {
   345  		req.ToPipelines = append(req.ToPipelines, NewPipeline(pipeline))
   346  	}
   347  	client, err := c.PpsAPIClient.FlushJob(c.Ctx(), req)
   348  	if err != nil {
   349  		return grpcutil.ScrubGRPC(err)
   350  	}
   351  	for {
   352  		jobInfo, err := client.Recv()
   353  		if err != nil {
   354  			if errors.Is(err, io.EOF) {
   355  				return nil
   356  			}
   357  			return grpcutil.ScrubGRPC(err)
   358  		}
   359  		if err := f(jobInfo); err != nil {
   360  			return err
   361  		}
   362  	}
   363  }
   364  
   365  // FlushJobAll returns all the jobs which were triggered by commits.
   366  // If toPipelines is non-nil then only the jobs between commits and those
   367  // pipelines in the DAG will be returned.
   368  func (c APIClient) FlushJobAll(commits []*pfs.Commit, toPipelines []string) ([]*pps.JobInfo, error) {
   369  	var result []*pps.JobInfo
   370  	if err := c.FlushJob(commits, toPipelines, func(ji *pps.JobInfo) error {
   371  		result = append(result, ji)
   372  		return nil
   373  	}); err != nil {
   374  		return nil, err
   375  	}
   376  	return result, nil
   377  }
   378  
   379  // DeleteJob deletes a job.
   380  func (c APIClient) DeleteJob(jobID string) error {
   381  	_, err := c.PpsAPIClient.DeleteJob(
   382  		c.Ctx(),
   383  		&pps.DeleteJobRequest{
   384  			Job: NewJob(jobID),
   385  		},
   386  	)
   387  	return grpcutil.ScrubGRPC(err)
   388  }
   389  
   390  // StopJob stops a job.
   391  func (c APIClient) StopJob(jobID string) error {
   392  	_, err := c.PpsAPIClient.StopJob(
   393  		c.Ctx(),
   394  		&pps.StopJobRequest{
   395  			Job: NewJob(jobID),
   396  		},
   397  	)
   398  	return grpcutil.ScrubGRPC(err)
   399  }
   400  
   401  // RestartDatum restarts a datum that's being processed as part of a job.
   402  // datumFilter is a slice of strings which are matched against either the Path
   403  // or Hash of the datum, the order of the strings in datumFilter is irrelevant.
   404  func (c APIClient) RestartDatum(jobID string, datumFilter []string) error {
   405  	_, err := c.PpsAPIClient.RestartDatum(
   406  		c.Ctx(),
   407  		&pps.RestartDatumRequest{
   408  			Job:         NewJob(jobID),
   409  			DataFilters: datumFilter,
   410  		},
   411  	)
   412  	return grpcutil.ScrubGRPC(err)
   413  }
   414  
   415  // ListDatum returns info about datums in a Job
   416  func (c APIClient) ListDatum(jobID string, pageSize, page int64) (*pps.ListDatumResponse, error) {
   417  	return c.listDatum(NewJob(jobID), nil, pageSize, page)
   418  }
   419  
   420  // ListDatumInput returns info about datums for a pipeline with input. The
   421  // pipeline doesn't need to exist.
   422  func (c APIClient) ListDatumInput(input *pps.Input, pageSize, page int64) (*pps.ListDatumResponse, error) {
   423  	return c.listDatum(nil, input, pageSize, page)
   424  }
   425  
   426  func (c APIClient) listDatum(job *pps.Job, input *pps.Input, pageSize, page int64) (*pps.ListDatumResponse, error) {
   427  	client, err := c.PpsAPIClient.ListDatumStream(
   428  		c.Ctx(),
   429  		&pps.ListDatumRequest{
   430  			Input:    input,
   431  			PageSize: pageSize,
   432  			Page:     page,
   433  			Job:      job,
   434  		},
   435  	)
   436  	if err != nil {
   437  		return nil, grpcutil.ScrubGRPC(err)
   438  	}
   439  	resp := &pps.ListDatumResponse{}
   440  	first := true
   441  	for {
   442  		r, err := client.Recv()
   443  		if errors.Is(err, io.EOF) {
   444  			break
   445  		} else if err != nil {
   446  			return nil, grpcutil.ScrubGRPC(err)
   447  		}
   448  		if first {
   449  			resp.TotalPages = r.TotalPages
   450  			resp.Page = r.Page
   451  			first = false
   452  		}
   453  		resp.DatumInfos = append(resp.DatumInfos, r.DatumInfo)
   454  	}
   455  	return resp, nil
   456  }
   457  
   458  // ListDatumOption represents an optional modification to a ListDatum request
   459  type ListDatumOption func(*pps.ListDatumRequest) error
   460  
   461  // WithStatusOnly causes a ListDatum request to only retrieve status information for datums,
   462  // which can improve performance
   463  func WithStatusOnly() ListDatumOption {
   464  	return func(req *pps.ListDatumRequest) error {
   465  		req.StatusOnly = true
   466  		return nil
   467  	}
   468  }
   469  
   470  // ListDatumF returns info about datums in a Job, calling f with each datum info.
   471  func (c APIClient) ListDatumF(jobID string, pageSize int64, page int64, f func(di *pps.DatumInfo) error, options ...ListDatumOption) error {
   472  	return c.listDatumF(NewJob(jobID), nil, pageSize, page, f, options...)
   473  }
   474  
   475  // ListDatumInputF returns info about datums for a pipeline with input, calling
   476  // f with each datum info. The pipeline doesn't need to exist.
   477  func (c APIClient) ListDatumInputF(input *pps.Input, pageSize, page int64, f func(di *pps.DatumInfo) error) error {
   478  	return c.listDatumF(nil, input, pageSize, page, f)
   479  }
   480  
   481  func (c APIClient) listDatumF(job *pps.Job, input *pps.Input, pageSize, page int64, f func(di *pps.DatumInfo) error, options ...ListDatumOption) error {
   482  	req := &pps.ListDatumRequest{
   483  		Input:    input,
   484  		PageSize: pageSize,
   485  		Page:     page,
   486  		Job:      job,
   487  	}
   488  	for _, opt := range options {
   489  		if err := opt(req); err != nil {
   490  			return err
   491  		}
   492  	}
   493  	client, err := c.PpsAPIClient.ListDatumStream(c.Ctx(), req)
   494  	if err != nil {
   495  		return grpcutil.ScrubGRPC(err)
   496  	}
   497  	for {
   498  		resp, err := client.Recv()
   499  		if errors.Is(err, io.EOF) {
   500  			return nil
   501  		} else if err != nil {
   502  			return grpcutil.ScrubGRPC(err)
   503  		}
   504  		if err := f(resp.DatumInfo); err != nil {
   505  			if errors.Is(err, errutil.ErrBreak) {
   506  				return nil
   507  			}
   508  			return err
   509  		}
   510  	}
   511  }
   512  
   513  // InspectDatum returns info about a single datum
   514  func (c APIClient) InspectDatum(jobID string, datumID string) (*pps.DatumInfo, error) {
   515  	datumInfo, err := c.PpsAPIClient.InspectDatum(
   516  		c.Ctx(),
   517  		&pps.InspectDatumRequest{
   518  			Datum: &pps.Datum{
   519  				ID:  datumID,
   520  				Job: NewJob(jobID),
   521  			},
   522  		},
   523  	)
   524  	if err != nil {
   525  		return nil, grpcutil.ScrubGRPC(err)
   526  	}
   527  	return datumInfo, nil
   528  }
   529  
   530  // LogsIter iterates through log messages returned from pps.GetLogs. Logs can
   531  // be fetched with 'Next()'. The log message received can be examined with
   532  // 'Message()', and any errors can be examined with 'Err()'.
   533  type LogsIter struct {
   534  	logsClient pps.API_GetLogsClient
   535  	msg        *pps.LogMessage
   536  	err        error
   537  }
   538  
   539  // Next retrieves the next relevant log message from pachd
   540  func (l *LogsIter) Next() bool {
   541  	if l.err != nil {
   542  		l.msg = nil
   543  		return false
   544  	}
   545  	l.msg, l.err = l.logsClient.Recv()
   546  	return l.err == nil
   547  }
   548  
   549  // Message returns the most recently retrieve log message (as an annotated log
   550  // line, in the form of a pps.LogMessage)
   551  func (l *LogsIter) Message() *pps.LogMessage {
   552  	return l.msg
   553  }
   554  
   555  // Err retrieves any errors encountered in the course of calling 'Next()'.
   556  func (l *LogsIter) Err() error {
   557  	if errors.Is(l.err, io.EOF) {
   558  		return nil
   559  	}
   560  	return grpcutil.ScrubGRPC(l.err)
   561  }
   562  
   563  // GetLogs gets logs from a job (logs includes stdout and stderr). 'pipelineName',
   564  // 'jobID', 'data', and 'datumID', are all filters. To forego any filter,
   565  // simply pass an empty value, though one of 'pipelineName' and 'jobID'
   566  // must be set. Responses are written to 'messages'
   567  func (c APIClient) GetLogs(
   568  	pipelineName string,
   569  	jobID string,
   570  	data []string,
   571  	datumID string,
   572  	master bool,
   573  	follow bool,
   574  	since time.Duration,
   575  ) *LogsIter {
   576  	return c.getLogs(pipelineName, jobID, data, datumID, master, follow, since, false)
   577  }
   578  
   579  // GetLogsLoki gets logs from a job (logs includes stdout and stderr). 'pipelineName',
   580  // 'jobID', 'data', and 'datumID', are all filters. To forego any filter,
   581  // simply pass an empty value, though one of 'pipelineName' and 'jobID'
   582  // must be set. Responses are written to 'messages'
   583  func (c APIClient) GetLogsLoki(
   584  	pipelineName string,
   585  	jobID string,
   586  	data []string,
   587  	datumID string,
   588  	master bool,
   589  	follow bool,
   590  	since time.Duration,
   591  ) *LogsIter {
   592  	return c.getLogs(pipelineName, jobID, data, datumID, master, follow, since, true)
   593  }
   594  
   595  func (c APIClient) getLogs(
   596  	pipelineName string,
   597  	jobID string,
   598  	data []string,
   599  	datumID string,
   600  	master bool,
   601  	follow bool,
   602  	since time.Duration,
   603  	useLoki bool,
   604  ) *LogsIter {
   605  	request := pps.GetLogsRequest{
   606  		Master:         master,
   607  		Follow:         follow,
   608  		UseLokiBackend: useLoki,
   609  		Since:          types.DurationProto(since),
   610  	}
   611  	if pipelineName != "" {
   612  		request.Pipeline = NewPipeline(pipelineName)
   613  	}
   614  	if jobID != "" {
   615  		request.Job = NewJob(jobID)
   616  	}
   617  	request.DataFilters = data
   618  	if datumID != "" {
   619  		request.Datum = &pps.Datum{
   620  			Job: NewJob(jobID),
   621  			ID:  datumID,
   622  		}
   623  	}
   624  	resp := &LogsIter{}
   625  	resp.logsClient, resp.err = c.PpsAPIClient.GetLogs(c.Ctx(), &request)
   626  	resp.err = grpcutil.ScrubGRPC(resp.err)
   627  	return resp
   628  }
   629  
   630  // CreatePipeline creates a new pipeline, pipelines are the main computation
   631  // object in PPS they create a flow of data from a set of input Repos to an
   632  // output Repo (which has the same name as the pipeline). Whenever new data is
   633  // committed to one of the input repos the pipelines will create jobs to bring
   634  // the output Repo up to data.
   635  // image is the Docker image to run the jobs in.
   636  // cmd is the command passed to the Docker run invocation.
   637  // NOTE as with Docker cmd is not run inside a shell that means that things
   638  // like wildcard globbing (*), pipes (|) and file redirects (> and >>) will not
   639  // work. To get that behavior you should have your command be a shell of your
   640  // choice and pass a shell script to stdin.
   641  // stdin is a slice of lines that are sent to your command on stdin. Lines need
   642  // not end in newline characters.
   643  // parallelism is how many copies of your container should run in parallel. You
   644  // may pass 0 for parallelism in which case PPS will set the parallelism based
   645  // on available resources.
   646  // input specifies a set of Repos that will be visible to the jobs during runtime.
   647  // commits to these repos will cause the pipeline to create new jobs to process them.
   648  // update indicates that you want to update an existing pipeline
   649  func (c APIClient) CreatePipeline(
   650  	name string,
   651  	image string,
   652  	cmd []string,
   653  	stdin []string,
   654  	parallelismSpec *pps.ParallelismSpec,
   655  	input *pps.Input,
   656  	outputBranch string,
   657  	update bool,
   658  ) error {
   659  	_, err := c.PpsAPIClient.CreatePipeline(
   660  		c.Ctx(),
   661  		&pps.CreatePipelineRequest{
   662  			Pipeline: NewPipeline(name),
   663  			Transform: &pps.Transform{
   664  				Image: image,
   665  				Cmd:   cmd,
   666  				Stdin: stdin,
   667  			},
   668  			ParallelismSpec: parallelismSpec,
   669  			Input:           input,
   670  			OutputBranch:    outputBranch,
   671  			Update:          update,
   672  		},
   673  	)
   674  	return grpcutil.ScrubGRPC(err)
   675  }
   676  
   677  // InspectPipeline returns info about a specific pipeline.
   678  func (c APIClient) InspectPipeline(pipelineName string) (*pps.PipelineInfo, error) {
   679  	pipelineInfo, err := c.PpsAPIClient.InspectPipeline(
   680  		c.Ctx(),
   681  		&pps.InspectPipelineRequest{
   682  			Pipeline: NewPipeline(pipelineName),
   683  		},
   684  	)
   685  	return pipelineInfo, grpcutil.ScrubGRPC(err)
   686  }
   687  
   688  // ListPipeline returns info about all pipelines.
   689  func (c APIClient) ListPipeline() ([]*pps.PipelineInfo, error) {
   690  	pipelineInfos, err := c.PpsAPIClient.ListPipeline(
   691  		c.Ctx(),
   692  		&pps.ListPipelineRequest{},
   693  	)
   694  	if err != nil {
   695  		return nil, grpcutil.ScrubGRPC(err)
   696  	}
   697  	return pipelineInfos.PipelineInfo, nil
   698  }
   699  
   700  // ListPipelineHistory returns historical information about pipelines.
   701  // `pipeline` specifies which pipeline to return history about, if it's equal
   702  // to "" then ListPipelineHistory returns historical information about all
   703  // pipelines.
   704  // `history` specifies how many historical revisions to return:
   705  // 0: Return the current version of the pipeline or pipelines.
   706  // 1: Return the above and the next most recent version
   707  // 2: etc.
   708  //-1: Return all historical versions.
   709  func (c APIClient) ListPipelineHistory(pipeline string, history int64) ([]*pps.PipelineInfo, error) {
   710  	var _pipeline *pps.Pipeline
   711  	if pipeline != "" {
   712  		_pipeline = NewPipeline(pipeline)
   713  	}
   714  	pipelineInfos, err := c.PpsAPIClient.ListPipeline(
   715  		c.Ctx(),
   716  		&pps.ListPipelineRequest{
   717  			Pipeline: _pipeline,
   718  			History:  history,
   719  		},
   720  	)
   721  	if err != nil {
   722  		return nil, grpcutil.ScrubGRPC(err)
   723  	}
   724  	return pipelineInfos.PipelineInfo, nil
   725  }
   726  
   727  // DeletePipeline deletes a pipeline along with its output Repo.
   728  func (c APIClient) DeletePipeline(name string, force bool, splitTransaction ...bool) error {
   729  	req := &pps.DeletePipelineRequest{
   730  		Pipeline: NewPipeline(name),
   731  		Force:    force,
   732  	}
   733  	if len(splitTransaction) > 0 {
   734  		req.SplitTransaction = splitTransaction[0]
   735  	}
   736  	_, err := c.PpsAPIClient.DeletePipeline(
   737  		c.Ctx(),
   738  		req,
   739  	)
   740  	return grpcutil.ScrubGRPC(err)
   741  }
   742  
   743  // StartPipeline restarts a stopped pipeline.
   744  func (c APIClient) StartPipeline(name string) error {
   745  	_, err := c.PpsAPIClient.StartPipeline(
   746  		c.Ctx(),
   747  		&pps.StartPipelineRequest{
   748  			Pipeline: NewPipeline(name),
   749  		},
   750  	)
   751  	return grpcutil.ScrubGRPC(err)
   752  }
   753  
   754  // StopPipeline prevents a pipeline from processing things, it can be restarted
   755  // with StartPipeline.
   756  func (c APIClient) StopPipeline(name string) error {
   757  	_, err := c.PpsAPIClient.StopPipeline(
   758  		c.Ctx(),
   759  		&pps.StopPipelineRequest{
   760  			Pipeline: NewPipeline(name),
   761  		},
   762  	)
   763  	return grpcutil.ScrubGRPC(err)
   764  }
   765  
   766  // RunPipeline runs a pipeline. It can be passed a list of commit provenance.
   767  // This will trigger a new job provenant on those commits, effectively running the pipeline on the data in those commits.
   768  func (c APIClient) RunPipeline(name string, provenance []*pfs.CommitProvenance, jobID string) error {
   769  	_, err := c.PpsAPIClient.RunPipeline(
   770  		c.Ctx(),
   771  		&pps.RunPipelineRequest{
   772  			Pipeline:   NewPipeline(name),
   773  			Provenance: provenance,
   774  			JobID:      jobID,
   775  		},
   776  	)
   777  	return grpcutil.ScrubGRPC(err)
   778  }
   779  
   780  // RunCron runs a pipeline. It can be passed a list of commit provenance.
   781  // This will trigger a new job provenant on those commits, effectively running the pipeline on the data in those commits.
   782  func (c APIClient) RunCron(name string) error {
   783  	_, err := c.PpsAPIClient.RunCron(
   784  		c.Ctx(),
   785  		&pps.RunCronRequest{
   786  			Pipeline: NewPipeline(name),
   787  		},
   788  	)
   789  	return grpcutil.ScrubGRPC(err)
   790  }
   791  
   792  // CreateSecret creates a secret on the cluster.
   793  func (c APIClient) CreateSecret(file []byte) error {
   794  	_, err := c.PpsAPIClient.CreateSecret(
   795  		c.Ctx(),
   796  		&pps.CreateSecretRequest{
   797  			File: file,
   798  		},
   799  	)
   800  	return grpcutil.ScrubGRPC(err)
   801  }
   802  
   803  // DeleteSecret deletes a secret from the cluster.
   804  func (c APIClient) DeleteSecret(secret string) error {
   805  	_, err := c.PpsAPIClient.DeleteSecret(
   806  		c.Ctx(),
   807  		&pps.DeleteSecretRequest{
   808  			Secret: &pps.Secret{Name: secret},
   809  		},
   810  	)
   811  	return grpcutil.ScrubGRPC(err)
   812  }
   813  
   814  // InspectSecret returns info about a specific secret.
   815  func (c APIClient) InspectSecret(secret string) (*pps.SecretInfo, error) {
   816  	secretInfo, err := c.PpsAPIClient.InspectSecret(
   817  		c.Ctx(),
   818  		&pps.InspectSecretRequest{
   819  			Secret: &pps.Secret{Name: secret},
   820  		},
   821  	)
   822  	return secretInfo, grpcutil.ScrubGRPC(err)
   823  }
   824  
   825  // ListSecret returns info about all Pachyderm secrets.
   826  func (c APIClient) ListSecret() ([]*pps.SecretInfo, error) {
   827  	secretInfos, err := c.PpsAPIClient.ListSecret(
   828  		c.Ctx(),
   829  		&types.Empty{},
   830  	)
   831  	if err != nil {
   832  		return nil, grpcutil.ScrubGRPC(err)
   833  	}
   834  	return secretInfos.SecretInfo, nil
   835  }
   836  
   837  // CreatePipelineService creates a new pipeline service.
   838  func (c APIClient) CreatePipelineService(
   839  	name string,
   840  	image string,
   841  	cmd []string,
   842  	stdin []string,
   843  	parallelismSpec *pps.ParallelismSpec,
   844  	input *pps.Input,
   845  	update bool,
   846  	internalPort int32,
   847  	externalPort int32,
   848  	annotations map[string]string,
   849  ) error {
   850  	_, err := c.PpsAPIClient.CreatePipeline(
   851  		c.Ctx(),
   852  		&pps.CreatePipelineRequest{
   853  			Pipeline: NewPipeline(name),
   854  			Metadata: &pps.Metadata{
   855  				Annotations: annotations,
   856  			},
   857  			Transform: &pps.Transform{
   858  				Image: image,
   859  				Cmd:   cmd,
   860  				Stdin: stdin,
   861  			},
   862  			ParallelismSpec: parallelismSpec,
   863  			Input:           input,
   864  			Update:          update,
   865  			Service: &pps.Service{
   866  				InternalPort: internalPort,
   867  				ExternalPort: externalPort,
   868  			},
   869  		},
   870  	)
   871  	return grpcutil.ScrubGRPC(err)
   872  }
   873  
   874  // GarbageCollect garbage collects unused data.  Currently GC needs to be run
   875  // while no data is being added or removed (which, among other things, implies
   876  // that there shouldn't be jobs actively running).  Pfs Garbage collection uses
   877  // bloom filters to keep track of live objects because it can store more
   878  // objects than can be indexed in memory. This means that there is a chance for
   879  // unreferenced objects to not be GCed, this chance increases as the number of
   880  // objects in the system increases. You can tradeoff using more memory to get a
   881  // lower chance of collisions, the default value is 10 MB and collisions should
   882  // be unlikely until you have 10 million objects.
   883  func (c APIClient) GarbageCollect(memoryBytes int64) error {
   884  	_, err := c.PpsAPIClient.GarbageCollect(
   885  		c.Ctx(),
   886  		&pps.GarbageCollectRequest{MemoryBytes: memoryBytes},
   887  	)
   888  	return grpcutil.ScrubGRPC(err)
   889  }
   890  
   891  // GetDatumTotalTime sums the timing stats from a DatumInfo
   892  func GetDatumTotalTime(s *pps.ProcessStats) time.Duration {
   893  	totalDuration := time.Duration(0)
   894  	duration, _ := types.DurationFromProto(s.DownloadTime)
   895  	totalDuration += duration
   896  	duration, _ = types.DurationFromProto(s.ProcessTime)
   897  	totalDuration += duration
   898  	duration, _ = types.DurationFromProto(s.UploadTime)
   899  	totalDuration += duration
   900  	return totalDuration
   901  }