github.com/pachyderm/pachyderm@v1.13.4/src/client/admin/v1_7/pps/pps.proto (about)

     1  syntax = "proto3";
     2  
     3  package pps_1_7;
     4  option go_package = "github.com/pachyderm/pachyderm/src/client/admin/v1_7/pps";
     5  
     6  import "google/protobuf/empty.proto";
     7  import "google/protobuf/timestamp.proto";
     8  import "google/protobuf/duration.proto";
     9  
    10  import "gogoproto/gogo.proto";
    11  
    12  import "client/admin/v1_7/pfs/pfs.proto";
    13  
    14  message Secret {
    15    // Name must be the name of the secret in kubernetes.
    16    string name = 1;
    17    // Key of the secret to load into env_var, this field only has meaning if EnvVar != "".
    18    string key = 4;
    19    string mount_path = 2;
    20    string env_var = 3;
    21  }
    22  
    23  message Transform {
    24    string image = 1;
    25    repeated string cmd = 2;
    26    map<string, string> env = 3;
    27    repeated Secret secrets = 4;
    28    repeated string image_pull_secrets = 9;
    29    repeated string stdin = 5;
    30    repeated int64 accept_return_code = 6;
    31    bool debug = 7;
    32    string user = 10;
    33    string working_dir = 11;
    34  }
    35  
    36  message Egress {
    37    string URL = 1;
    38  }
    39  
    40  message Job {
    41    string id = 1 [(gogoproto.customname) = "ID"];
    42  }
    43  
    44  enum JobState {
    45    JOB_STARTING = 0;
    46    JOB_RUNNING = 1;
    47    JOB_FAILURE = 2;
    48    JOB_SUCCESS = 3;
    49    JOB_KILLED = 4;
    50  }
    51  
    52  message Service {
    53    int32 internal_port = 1;
    54    int32 external_port = 2;
    55    string ip = 3 [(gogoproto.customname) = "IP"];
    56  }
    57  
    58  message AtomInput {
    59    reserved 7;
    60    string name = 1;
    61    string repo = 2;
    62    string branch = 3;
    63    string commit = 4;
    64    string glob = 5;
    65    bool lazy = 6;
    66    // EmptyFiles, if true, will cause files from this atom to be presented as
    67    // empty files. This is useful in shuffle pipelines where you want to read
    68    // the names of files and reorganize them using symlinks.
    69    bool empty_files = 8;
    70  }
    71  
    72  message CronInput {
    73    string name = 1;
    74    string repo = 2;
    75    string commit = 3;
    76    string spec = 4;
    77    google.protobuf.Timestamp start = 5;
    78  }
    79  
    80  message GitInput {
    81    string name = 1;
    82    string url = 2 [(gogoproto.customname) = "URL"];
    83    string branch = 3;
    84    string commit = 4;
    85  }
    86  
    87  message Input {
    88    AtomInput atom = 1;
    89    repeated Input cross = 2;
    90    repeated Input union = 3;
    91    CronInput cron = 4;
    92    GitInput git = 5;
    93  }
    94  
    95  message JobInput {
    96    string name = 4;
    97    pfs_1_7.Commit commit = 1;
    98    string glob = 2;
    99    bool lazy = 3;
   100  }
   101  
   102  message ParallelismSpec {
   103    reserved 1;
   104  
   105    // Starts the pipeline/job with a 'constant' workers, unless 'constant' is
   106    // zero. If 'constant' is zero (which is the zero value of ParallelismSpec),
   107    // then Pachyderm will choose the number of workers that is started,
   108    // (currently it chooses the number of workers in the cluster)
   109    uint64 constant = 2;
   110  
   111    // Starts the pipeline/job with number of workers equal to 'coefficient' * N,
   112    // where N is the number of nodes in the kubernetes cluster.
   113    //
   114    // For example, if each Kubernetes node has four CPUs, you might set
   115    // 'coefficient' to four, so that there are four Pachyderm workers per
   116    // Kubernetes node, and each Pachyderm worker gets one CPU. If you want to
   117    // reserve half the nodes in your cluster for other tasks, you might set
   118    // 'coefficient' to 0.5.
   119    double coefficient = 3;
   120  }
   121  
   122  // HashTreeSpec sets the number of shards into which pps splits a pipeline's
   123  // output commits (sharded commits are implemented in Pachyderm 1.8+ only)
   124  message HashtreeSpec {
   125    uint64 constant = 1;
   126  }
   127  
   128  message InputFile {
   129    // This file's absolute path within its pfs repo.
   130    string path = 4;
   131  
   132    // This file's hash
   133    bytes hash = 5;
   134  }
   135  
   136  message Datum {
   137    // ID is the hash computed from all the files
   138    string id = 1 [(gogoproto.customname) = "ID"];
   139    Job job = 2;
   140  }
   141  
   142  enum DatumState {
   143      FAILED = 0;
   144      SUCCESS = 1;
   145      SKIPPED = 2;
   146      STARTING = 3;
   147  }
   148  
   149  message DatumInfo {
   150    Datum datum = 1;
   151    DatumState state = 2;
   152    ProcessStats stats = 3;
   153    pfs_1_7.File pfs_state = 4;
   154    repeated pfs_1_7.FileInfo data = 5;
   155  }
   156  
   157  message Aggregate {
   158    int64 count = 1;
   159    double mean = 2;
   160    double stddev = 3;
   161    double fifth_percentile = 4;
   162    double ninety_fifth_percentile = 5;
   163  }
   164  
   165  message ProcessStats {
   166    google.protobuf.Duration download_time = 1;
   167    google.protobuf.Duration process_time = 2;
   168    google.protobuf.Duration upload_time = 3;
   169    uint64 download_bytes = 4;
   170    uint64 upload_bytes = 5;
   171  }
   172  
   173  message AggregateProcessStats {
   174    Aggregate download_time = 1;
   175    Aggregate process_time = 2;
   176    Aggregate upload_time = 3;
   177    Aggregate download_bytes = 4;
   178    Aggregate upload_bytes = 5;
   179  }
   180  
   181  message WorkerStatus {
   182    string worker_id = 1 [(gogoproto.customname) = "WorkerID"];
   183    string job_id = 2 [(gogoproto.customname) = "JobID"];
   184    repeated InputFile data = 3;
   185    // Started is the time processing on the current datum began.
   186    google.protobuf.Timestamp started = 4;
   187    ProcessStats stats = 5;
   188    int64 queue_size = 6;
   189  }
   190  
   191  // ResourceSpec describes the amount of resources that pipeline pods should
   192  // request from kubernetes, for scheduling.
   193  message ResourceSpec {
   194    // The number of CPUs each worker needs (partial values are allowed, and
   195    // encouraged)
   196    float cpu = 1;
   197  
   198    // The amount of memory each worker needs (in bytes, with allowed
   199    // SI suffixes (M, K, G, Mi, Ki, Gi, etc).
   200    string memory = 2;
   201  
   202    // The number of GPUs each worker needs.
   203    int64 gpu = 3;
   204  
   205    // The amount of ephemeral storage each worker needs (in bytes, with allowed
   206    // SI suffixes (M, K, G, Mi, Ki, Gi, etc).
   207    string disk = 4;
   208  }
   209  
   210  // EtcdJobInfo is the portion of the JobInfo that gets stored in etcd during
   211  // job execution. It contains fields which change over the lifetime of the job
   212  // but aren't used in the execution of the job.
   213  message EtcdJobInfo {
   214    Job job = 1;
   215    Pipeline pipeline = 2;
   216    pfs_1_7.Commit output_commit = 3;
   217    // Job restart count (e.g. due to datum failure)
   218    uint64 restart = 4;
   219  
   220    // Counts of how many times we processed or skipped a datum
   221    int64 data_processed = 5;
   222    int64 data_skipped = 6;
   223    int64 data_total = 7;
   224    int64 data_failed = 8;
   225  
   226    // Download/process/upload time and download/upload bytes
   227    ProcessStats stats = 9;
   228  
   229    pfs_1_7.Commit stats_commit = 10;
   230    JobState state = 11;
   231    string reason = 12;
   232    google.protobuf.Timestamp started = 13;
   233    google.protobuf.Timestamp finished = 14;
   234  }
   235  
   236  message JobInfo {
   237    reserved 4, 5;
   238    Job job = 1;
   239    Transform transform = 2;
   240    Pipeline pipeline = 3;
   241    uint64 pipeline_version = 13;
   242    ParallelismSpec parallelism_spec = 12;
   243    Egress egress = 15;
   244    Job parent_job = 6;
   245    google.protobuf.Timestamp started = 7;
   246    google.protobuf.Timestamp finished = 8;
   247    pfs_1_7.Commit output_commit = 9;
   248    JobState state = 10;
   249    string reason = 35;  // reason explains why the job is in the current state
   250    Service service = 14;
   251    pfs_1_7.Repo output_repo = 18;
   252    string output_branch = 17;
   253    uint64 restart = 20;
   254    int64 data_processed = 22;
   255    int64 data_skipped = 30;
   256    int64 data_failed = 40;
   257    int64 data_total = 23;
   258    ProcessStats stats = 31;
   259    repeated WorkerStatus worker_status = 24;
   260    ResourceSpec resource_requests = 25;
   261    ResourceSpec resource_limits = 36;
   262    Input input = 26;
   263    pfs_1_7.BranchInfo new_branch = 27;
   264    bool incremental = 28;
   265    pfs_1_7.Commit stats_commit = 29;
   266    bool enable_stats = 32;
   267    string salt = 33;
   268    bool batch = 34;
   269    ChunkSpec chunk_spec = 37;
   270    google.protobuf.Duration datum_timeout = 38;
   271    google.protobuf.Duration job_timeout = 39;
   272    int64 datum_tries = 41;
   273    SchedulingSpec scheduling_spec = 42;
   274    string pod_spec = 43;
   275  }
   276  
   277  enum WorkerState {
   278    POD_RUNNING = 0;
   279    POD_SUCCESS = 1;
   280    POD_FAILED = 2;
   281  }
   282  
   283  message Worker {
   284    string name = 1;
   285    WorkerState state = 2;
   286  }
   287  
   288  message JobInfos {
   289    repeated JobInfo job_info = 1;
   290  }
   291  
   292  message Pipeline {
   293    string name = 1;
   294  }
   295  
   296  message PipelineInput {
   297    string name = 5;
   298    pfs_1_7.Repo repo = 1;
   299    string branch = 2;
   300    string glob = 3;
   301    bool lazy = 4;
   302    pfs_1_7.Commit from = 6;
   303  }
   304  
   305  enum PipelineState {
   306    // When the pipeline is not ready to be triggered by commits.
   307    // This happens when either 1) a pipeline has been created but not
   308    // yet picked up by a PPS server, or 2) the pipeline does not have
   309    // any inputs and is meant to be triggered manually
   310    PIPELINE_STARTING = 0;
   311    // After this pipeline is picked up by a pachd node.  This is the normal
   312    // state of a pipeline.
   313    PIPELINE_RUNNING = 1;
   314    // After some error caused runPipeline to exit, but before the
   315    // pipeline is re-run.  This is when the exponential backoff is
   316    // in effect.
   317    PIPELINE_RESTARTING = 2;
   318    // We have retried too many times and we have given up on this pipeline.
   319    PIPELINE_FAILURE = 3;
   320    // The pipeline has been explicitly paused by the user.
   321    PIPELINE_PAUSED = 4;
   322    // The pipeline is fully functional, but there are no commits to process.
   323    PIPELINE_STANDBY = 5;
   324  }
   325  
   326  // EtcdPipelineInfo is proto that Pachd stores in etcd for each pipeline. It
   327  // tracks the state of the pipeline, and points to its metadata in PFS (and,
   328  // by pointing to a PFS commit, de facto tracks the pipeline's version)
   329  message EtcdPipelineInfo {
   330    PipelineState state = 1;
   331    string reason = 4;
   332    pfs_1_7.Commit spec_commit = 2;
   333    map<int32, int32> job_counts = 3;
   334    string auth_token = 5;
   335  }
   336  
   337  message PipelineInfo {
   338    reserved 3, 4, 26;
   339    string id = 17 [(gogoproto.customname) = "ID"];
   340    Pipeline pipeline = 1;
   341    uint64 version = 11;
   342    Transform transform = 2;
   343    ParallelismSpec parallelism_spec = 10;
   344  	HashtreeSpec hashtree_spec = 42;
   345    Egress egress = 15;
   346    google.protobuf.Timestamp created_at = 6;
   347  
   348    // state indicates the current state of the pipeline. This is not stored in
   349    // PFS along with the rest of this data structure--PPS.InspectPipeline fills
   350    // it in
   351    PipelineState state = 7;
   352    // same for stopped field
   353    bool stopped = 38;
   354    string recent_error = 8;
   355  
   356    // job_counts indicates the number of jobs within this pipeline in a given
   357    // state. This is not stored in PFS along with the rest of this data
   358    // structure--PPS.InspectPipeline fills it in
   359    map<int32, int32> job_counts = 9;
   360    string output_branch = 16;
   361    google.protobuf.Duration scale_down_threshold = 18;
   362    ResourceSpec resource_requests = 19;
   363    ResourceSpec resource_limits = 31;
   364    Input input = 20;
   365    string description = 21;
   366    bool incremental = 22;
   367    string cache_size = 23;
   368    bool enable_stats = 24;
   369    string salt = 25;
   370    bool batch = 27;
   371  
   372    // reason includes any error messages associated with a failed pipeline
   373    string reason = 28;
   374    int64 max_queue_size = 29;
   375    Service service = 30;
   376    ChunkSpec chunk_spec = 32;
   377    google.protobuf.Duration datum_timeout = 33;
   378    google.protobuf.Duration job_timeout = 34;
   379    string githook_url = 35 [(gogoproto.customname) = "GithookURL"];
   380    pfs_1_7.Commit spec_commit = 36;
   381    bool standby = 37;
   382    int64 datum_tries = 39;
   383    SchedulingSpec scheduling_spec = 40;
   384    string pod_spec = 41;
   385  }
   386  
   387  message PipelineInfos {
   388    repeated PipelineInfo pipeline_info = 1;
   389  }
   390  
   391  message CreateJobRequest {
   392    reserved 3, 4, 1, 10, 7, 9, 8, 12, 11, 13, 14, 21, 15, 16, 17, 18, 19, 20, 22, 23, 24;
   393    Pipeline pipeline = 2;
   394    pfs_1_7.Commit output_commit = 25;
   395  }
   396  
   397  message InspectJobRequest {
   398    // Callers should set either Job or OutputCommit, not both.
   399    Job job = 1;
   400    pfs_1_7.Commit output_commit = 3;
   401    bool block_state = 2; // block until state is either JOB_STATE_FAILURE or JOB_STATE_SUCCESS
   402  }
   403  
   404  message ListJobRequest {
   405    Pipeline pipeline = 1; // nil means all pipelines
   406    repeated pfs_1_7.Commit input_commit = 2; // nil means all inputs
   407    pfs_1_7.Commit output_commit = 3;
   408  }
   409  
   410  message FlushJobRequest {
   411    repeated pfs_1_7.Commit commits = 1;
   412    repeated Pipeline to_pipelines = 2;
   413  }
   414  
   415  message DeleteJobRequest {
   416    Job job = 1;
   417  }
   418  
   419  message StopJobRequest {
   420    Job job = 1;
   421  }
   422  
   423  message GetLogsRequest {
   424    reserved 4;
   425    // The pipeline from which we want to get logs (required if the job in 'job'
   426    // was created as part of a pipeline. To get logs from a non-orphan job
   427    // without the pipeline that created it, you need to use ElasticSearch).
   428    Pipeline pipeline = 2;
   429  
   430    // The job from which we want to get logs.
   431    Job job = 1;
   432  
   433    // Names of input files from which we want processing logs. This may contain
   434    // multiple files, to query pipelines that contain multiple inputs. Each
   435    // filter may be an absolute path of a file within a pps repo, or it may be
   436    // a hash for that file (to search for files at specific versions)
   437    repeated string data_filters = 3;
   438  
   439    Datum datum = 6;
   440  
   441    // If true get logs from the master process
   442    bool master = 5;
   443  
   444    // Continue to follow new logs as they become available.
   445    bool follow = 7;
   446  
   447    // If nonzero, the number of lines from the end of the logs to return.  Note:
   448    // tail applies per container, so you will get tail * <number of pods> total
   449    // lines back.
   450    int64 tail = 8;
   451  }
   452  
   453  // LogMessage is a log line from a PPS worker, annotated with metadata
   454  // indicating when and why the line was logged.
   455  message LogMessage {
   456    // The job and pipeline for which a PFS file is being processed (if the job
   457    // is an orphan job, pipeline name and ID will be unset)
   458    string pipeline_name = 1;
   459    string job_id = 3 [(gogoproto.customname) = "JobID"];
   460    string worker_id = 7 [(gogoproto.customname) = "WorkerID"];
   461    string datum_id = 9 [(gogoproto.customname) = "DatumID"];
   462    bool master = 10;
   463  
   464    // The PFS files being processed (one per pipeline/job input)
   465    repeated InputFile data = 4;
   466  
   467    // User is true if log message comes from the users code.
   468    bool user = 8;
   469  
   470    // The message logged, and the time at which it was logged
   471    google.protobuf.Timestamp ts = 5;
   472    string message = 6;
   473  }
   474  
   475  message RestartDatumRequest {
   476    Job job = 1;
   477    repeated string data_filters = 2;
   478  }
   479  
   480  message InspectDatumRequest {
   481    Datum datum = 1;
   482  }
   483  
   484  message ListDatumRequest {
   485    Job job = 1;
   486    int64 page_size = 2;
   487    int64 page = 3;
   488  }
   489  
   490  message ListDatumResponse {
   491    repeated DatumInfo datum_infos = 1;
   492    int64 total_pages = 2;
   493    int64 page = 3;
   494  }
   495  
   496  // ListDatumStreamResponse is identical to ListDatumResponse, except that only
   497  // one DatumInfo is present (as these responses are streamed)
   498  message ListDatumStreamResponse {
   499    DatumInfo datum_info = 1;
   500    // total_pages is only set in the first response (and set to 0 in all other
   501    // responses)
   502    int64 total_pages = 2;
   503    // page is only set in the first response (and set to 0 in all other
   504    // responses)
   505    int64 page = 3;
   506  }
   507  
   508  // ChunkSpec specifies how a pipeline should chunk its datums.
   509  message ChunkSpec {
   510    // number, if nonzero, specifies that each chunk should contain `number`
   511    // datums. Chunks may contain fewer if the total number of datums don't
   512    // divide evenly.
   513    int64 number = 1;
   514    // size_bytes, if nonzero, specifies a target size for each chunk of datums.
   515    // Chunks may be larger or smaller than size_bytes, but will usually be
   516    // pretty close to size_bytes in size.
   517    int64 size_bytes = 2;
   518  }
   519  
   520  message SchedulingSpec {
   521    map<string, string> node_selector = 1;
   522    string priority_class_name = 2;
   523  }
   524  
   525  message CreatePipelineRequest {
   526    reserved 3, 4;
   527    Pipeline pipeline = 1;
   528    Transform transform = 2;
   529    ParallelismSpec parallelism_spec = 7;
   530    HashtreeSpec hashtree_spec = 31;
   531    Egress egress = 9;
   532    bool update = 5;
   533    string output_branch = 10;
   534    google.protobuf.Duration scale_down_threshold = 11;
   535    ResourceSpec resource_requests = 12;
   536    ResourceSpec resource_limits = 22;
   537    Input input = 13;
   538    string description = 14;
   539    bool incremental = 15;
   540    string cache_size = 16;
   541    bool enable_stats = 17;
   542    // Reprocess forces the pipeline to reprocess all datums.
   543    // It only has meaning if Update is true
   544    bool reprocess = 18;
   545    bool batch = 19;
   546    int64 max_queue_size = 20;
   547    Service service = 21;
   548    ChunkSpec chunk_spec = 23;
   549    google.protobuf.Duration datum_timeout = 24;
   550    google.protobuf.Duration job_timeout = 25;
   551    string salt = 26;
   552    bool standby = 27;
   553    int64 datum_tries = 28;
   554    SchedulingSpec scheduling_spec = 29;
   555    string pod_spec = 30;
   556  }
   557  
   558  message InspectPipelineRequest {
   559    Pipeline pipeline = 1;
   560  }
   561  
   562  message ListPipelineRequest {
   563  }
   564  
   565  message DeletePipelineRequest {
   566    reserved 2, 3;
   567    Pipeline pipeline = 1;
   568    bool all = 4;
   569    bool force = 5;
   570  }
   571  
   572  message StartPipelineRequest {
   573    Pipeline pipeline = 1;
   574  }
   575  
   576  message StopPipelineRequest {
   577    Pipeline pipeline = 1;
   578  }
   579  
   580  message RerunPipelineRequest {
   581    Pipeline pipeline = 1;
   582    repeated pfs_1_7.Commit exclude = 2;
   583    repeated pfs_1_7.Commit include = 3;
   584  }
   585  
   586  message GarbageCollectRequest {
   587      // Memory is how much memory to use in computing which objects are alive. A
   588      // larger number will result in more precise garbage collection (at the
   589      // cost of more memory usage).
   590      int64 memory_bytes = 1;
   591  }
   592  message GarbageCollectResponse {}
   593  
   594  message ActivateAuthRequest {}
   595  message ActivateAuthResponse {}
   596  
   597  service API {
   598    rpc CreateJob(CreateJobRequest) returns (Job) {}
   599    rpc InspectJob(InspectJobRequest) returns (JobInfo) {}
   600    // ListJob returns information about current and past Pachyderm jobs. This is
   601    // deprecated in favor of ListJobStream
   602    rpc ListJob(ListJobRequest) returns (JobInfos) {}
   603    // ListJobStream returns information about current and past Pachyderm jobs.
   604    rpc ListJobStream(ListJobRequest) returns (stream JobInfo) {}
   605    rpc FlushJob(FlushJobRequest) returns (stream JobInfo) {}
   606    rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {}
   607    rpc StopJob(StopJobRequest) returns (google.protobuf.Empty) {}
   608    rpc InspectDatum(InspectDatumRequest) returns (DatumInfo) {}
   609    // ListDatum returns information about each datum fed to a Pachyderm job. This
   610    // is deprecated in favor of ListDatumStream
   611    rpc ListDatum(ListDatumRequest) returns (ListDatumResponse) {}
   612    // ListDatumStream returns information about each datum fed to a Pachyderm job
   613    rpc ListDatumStream(ListDatumRequest) returns (stream ListDatumStreamResponse) {}
   614    rpc RestartDatum(RestartDatumRequest) returns (google.protobuf.Empty) {}
   615  
   616    rpc CreatePipeline(CreatePipelineRequest) returns (google.protobuf.Empty) {}
   617    rpc InspectPipeline(InspectPipelineRequest) returns (PipelineInfo) {}
   618    rpc ListPipeline(ListPipelineRequest) returns (PipelineInfos) {}
   619    rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {}
   620    rpc StartPipeline(StartPipelineRequest) returns (google.protobuf.Empty) {}
   621    rpc StopPipeline(StopPipelineRequest) returns (google.protobuf.Empty) {}
   622    rpc RerunPipeline(RerunPipelineRequest) returns (google.protobuf.Empty) {}
   623  
   624    // DeleteAll deletes everything
   625    rpc DeleteAll(google.protobuf.Empty) returns (google.protobuf.Empty) {}
   626    rpc GetLogs(GetLogsRequest) returns (stream LogMessage) {}
   627  
   628    // Garbage collection
   629    rpc GarbageCollect(GarbageCollectRequest) returns (GarbageCollectResponse) {}
   630  
   631    // An internal call that causes PPS to put itself into an auth-enabled state
   632    // (all pipeline have tokens, correct permissions, etcd)
   633    rpc ActivateAuth(ActivateAuthRequest) returns (ActivateAuthResponse) {}
   634  }