github.com/pachyderm/pachyderm@v1.13.4/src/client/admin/v1_8/pps/pps.proto (about) 1 syntax = "proto3"; 2 3 package pps_1_8; 4 option go_package = "github.com/pachyderm/pachyderm/src/client/admin/v1_8/pps"; 5 6 import "google/protobuf/empty.proto"; 7 import "google/protobuf/timestamp.proto"; 8 import "google/protobuf/duration.proto"; 9 10 import "gogoproto/gogo.proto"; 11 12 import "client/admin/v1_8/pfs/pfs.proto"; 13 14 message Secret { 15 // Name must be the name of the secret in kubernetes. 16 string name = 1; 17 // Key of the secret to load into env_var, this field only has meaning if EnvVar != "". 18 string key = 4; 19 string mount_path = 2; 20 string env_var = 3; 21 } 22 23 message Transform { 24 string image = 1; 25 repeated string cmd = 2; 26 map<string, string> env = 3; 27 repeated Secret secrets = 4; 28 repeated string image_pull_secrets = 9; 29 repeated string stdin = 5; 30 repeated int64 accept_return_code = 6; 31 bool debug = 7; 32 string user = 10; 33 string working_dir = 11; 34 string dockerfile = 12; 35 } 36 37 message Egress { 38 string URL = 1; 39 } 40 41 message Job { 42 string id = 1 [(gogoproto.customname) = "ID"]; 43 } 44 45 enum JobState { 46 JOB_STARTING = 0; 47 JOB_RUNNING = 1; 48 JOB_FAILURE = 2; 49 JOB_SUCCESS = 3; 50 JOB_KILLED = 4; 51 JOB_MERGING = 5; 52 } 53 54 message Service { 55 int32 internal_port = 1; 56 int32 external_port = 2; 57 string ip = 3 [(gogoproto.customname) = "IP"]; 58 } 59 60 message Spout { 61 bool overwrite = 1; 62 } 63 64 // Note: this is deprecated and replaced by `PfsInput` 65 message AtomInput { 66 reserved 7; 67 string name = 1; 68 string repo = 2; 69 string branch = 3; 70 string commit = 4; 71 string glob = 5; 72 bool lazy = 6; 73 // EmptyFiles, if true, will cause files from this atom to be presented as 74 // empty files. This is useful in shuffle pipelines where you want to read 75 // the names of files and reorganize them using symlinks. 76 bool empty_files = 8; 77 } 78 79 message PFSInput { 80 string name = 1; 81 string repo = 2; 82 string branch = 3; 83 string commit = 4; 84 string glob = 5; 85 bool lazy = 6; 86 // EmptyFiles, if true, will cause files from this PFS input to be 87 // presented as empty files. This is useful in shuffle pipelines where you 88 // want to read the names of files and reorganize them using symlinks. 89 bool empty_files = 7; 90 } 91 92 message CronInput { 93 string name = 1; 94 string repo = 2; 95 string commit = 3; 96 string spec = 4; 97 // Overwrite, if true, will expose a single datum that gets overwritten each 98 // tick. If false, it will create a new datum for each tick. 99 bool overwrite = 6; 100 google.protobuf.Timestamp start = 5; 101 } 102 103 message GitInput { 104 string name = 1; 105 string url = 2 [(gogoproto.customname) = "URL"]; 106 string branch = 3; 107 string commit = 4; 108 } 109 110 message Input { 111 // Note: this is deprecated and replaced by `PfsInput` 112 AtomInput atom = 1; 113 PFSInput pfs = 6; 114 repeated Input cross = 2; 115 repeated Input union = 3; 116 CronInput cron = 4; 117 GitInput git = 5; 118 } 119 120 message JobInput { 121 string name = 4; 122 pfs_1_8.Commit commit = 1; 123 string glob = 2; 124 bool lazy = 3; 125 } 126 127 message ParallelismSpec { 128 reserved 1; 129 130 // Starts the pipeline/job with a 'constant' workers, unless 'constant' is 131 // zero. If 'constant' is zero (which is the zero value of ParallelismSpec), 132 // then Pachyderm will choose the number of workers that is started, 133 // (currently it chooses the number of workers in the cluster) 134 uint64 constant = 2; 135 136 // Starts the pipeline/job with number of workers equal to 'coefficient' * N, 137 // where N is the number of nodes in the kubernetes cluster. 138 // 139 // For example, if each Kubernetes node has four CPUs, you might set 140 // 'coefficient' to four, so that there are four Pachyderm workers per 141 // Kubernetes node, and each Pachyderm worker gets one CPU. If you want to 142 // reserve half the nodes in your cluster for other tasks, you might set 143 // 'coefficient' to 0.5. 144 double coefficient = 3; 145 } 146 147 // HashTreeSpec sets the number of shards into which pps splits a pipeline's 148 // output commits (sharded commits are implemented in Pachyderm 1.8+ only) 149 message HashtreeSpec { 150 uint64 constant = 1; 151 } 152 153 message InputFile { 154 // This file's absolute path within its pfs repo. 155 string path = 4; 156 157 // This file's hash 158 bytes hash = 5; 159 } 160 161 message Datum { 162 // ID is the hash computed from all the files 163 string id = 1 [(gogoproto.customname) = "ID"]; 164 Job job = 2; 165 } 166 167 enum DatumState { 168 FAILED = 0; 169 SUCCESS = 1; 170 SKIPPED = 2; 171 STARTING = 3; 172 } 173 174 message DatumInfo { 175 Datum datum = 1; 176 DatumState state = 2; 177 ProcessStats stats = 3; 178 pfs_1_8.File pfs_state = 4; 179 repeated pfs_1_8.FileInfo data = 5; 180 } 181 182 message Aggregate { 183 int64 count = 1; 184 double mean = 2; 185 double stddev = 3; 186 double fifth_percentile = 4; 187 double ninety_fifth_percentile = 5; 188 } 189 190 message ProcessStats { 191 google.protobuf.Duration download_time = 1; 192 google.protobuf.Duration process_time = 2; 193 google.protobuf.Duration upload_time = 3; 194 uint64 download_bytes = 4; 195 uint64 upload_bytes = 5; 196 } 197 198 message AggregateProcessStats { 199 Aggregate download_time = 1; 200 Aggregate process_time = 2; 201 Aggregate upload_time = 3; 202 Aggregate download_bytes = 4; 203 Aggregate upload_bytes = 5; 204 } 205 206 message WorkerStatus { 207 string worker_id = 1 [(gogoproto.customname) = "WorkerID"]; 208 string job_id = 2 [(gogoproto.customname) = "JobID"]; 209 repeated pps_1_8.InputFile data = 3; 210 // Started is the time processing on the current datum began. 211 google.protobuf.Timestamp started = 4; 212 ProcessStats stats = 5; 213 int64 queue_size = 6; 214 } 215 216 // ResourceSpec describes the amount of resources that pipeline pods should 217 // request from kubernetes, for scheduling. 218 message ResourceSpec { 219 reserved 3; 220 221 // The number of CPUs each worker needs (partial values are allowed, and 222 // encouraged) 223 float cpu = 1; 224 225 // The amount of memory each worker needs (in bytes, with allowed 226 // SI suffixes (M, K, G, Mi, Ki, Gi, etc). 227 string memory = 2; 228 229 // The spec for GPU resources. 230 GPUSpec gpu = 5; 231 232 // The amount of ephemeral storage each worker needs (in bytes, with allowed 233 // SI suffixes (M, K, G, Mi, Ki, Gi, etc). 234 string disk = 4; 235 } 236 237 message GPUSpec { 238 // The type of GPU (nvidia.com/gpu or amd.com/gpu for example). 239 string type = 1; 240 // The number of GPUs to request. 241 int64 number = 2; 242 } 243 244 // EtcdJobInfo is the portion of the JobInfo that gets stored in etcd during 245 // job execution. It contains fields which change over the lifetime of the job 246 // but aren't used in the execution of the job. 247 message EtcdJobInfo { 248 Job job = 1; 249 Pipeline pipeline = 2; 250 pfs_1_8.Commit output_commit = 3; 251 // Job restart count (e.g. due to datum failure) 252 uint64 restart = 4; 253 254 // Counts of how many times we processed or skipped a datum 255 int64 data_processed = 5; 256 int64 data_skipped = 6; 257 int64 data_total = 7; 258 int64 data_failed = 8; 259 260 // Download/process/upload time and download/upload bytes 261 ProcessStats stats = 9; 262 263 pfs_1_8.Commit stats_commit = 10; 264 JobState state = 11; 265 string reason = 12; 266 google.protobuf.Timestamp started = 13; 267 google.protobuf.Timestamp finished = 14; 268 } 269 270 message JobInfo { 271 reserved 4, 5, 28; 272 Job job = 1; 273 Transform transform = 2; 274 Pipeline pipeline = 3; 275 uint64 pipeline_version = 13; 276 ParallelismSpec parallelism_spec = 12; 277 Egress egress = 15; 278 Job parent_job = 6; 279 google.protobuf.Timestamp started = 7; 280 google.protobuf.Timestamp finished = 8; 281 pfs_1_8.Commit output_commit = 9; 282 JobState state = 10; 283 string reason = 35; // reason explains why the job is in the current state 284 Service service = 14; 285 Spout spout = 45; 286 pfs_1_8.Repo output_repo = 18; 287 string output_branch = 17; 288 uint64 restart = 20; 289 int64 data_processed = 22; 290 int64 data_skipped = 30; 291 int64 data_failed = 40; 292 int64 data_total = 23; 293 ProcessStats stats = 31; 294 repeated WorkerStatus worker_status = 24; 295 ResourceSpec resource_requests = 25; 296 ResourceSpec resource_limits = 36; 297 Input input = 26; 298 pfs_1_8.BranchInfo new_branch = 27; 299 pfs_1_8.Commit stats_commit = 29; 300 bool enable_stats = 32; 301 string salt = 33; 302 bool batch = 34; 303 ChunkSpec chunk_spec = 37; 304 google.protobuf.Duration datum_timeout = 38; 305 google.protobuf.Duration job_timeout = 39; 306 int64 datum_tries = 41; 307 SchedulingSpec scheduling_spec = 42; 308 string pod_spec = 43; 309 string pod_patch = 44; 310 } 311 312 enum WorkerState { 313 POD_RUNNING = 0; 314 POD_SUCCESS = 1; 315 POD_FAILED = 2; 316 } 317 318 message Worker { 319 string name = 1; 320 WorkerState state = 2; 321 } 322 323 message JobInfos { 324 repeated JobInfo job_info = 1; 325 } 326 327 message Pipeline { 328 string name = 1; 329 } 330 331 message PipelineInput { 332 string name = 5; 333 pfs_1_8.Repo repo = 1; 334 string branch = 2; 335 string glob = 3; 336 bool lazy = 4; 337 pfs_1_8.Commit from = 6; 338 } 339 340 enum PipelineState { 341 // When the pipeline is not ready to be triggered by commits. 342 // This happens when either 1) a pipeline has been created but not 343 // yet picked up by a PPS server, or 2) the pipeline does not have 344 // any inputs and is meant to be triggered manually 345 PIPELINE_STARTING = 0; 346 // After this pipeline is picked up by a pachd node. This is the normal 347 // state of a pipeline. 348 PIPELINE_RUNNING = 1; 349 // After some error caused runPipeline to exit, but before the 350 // pipeline is re-run. This is when the exponential backoff is 351 // in effect. 352 PIPELINE_RESTARTING = 2; 353 // We have retried too many times and we have given up on this pipeline. 354 PIPELINE_FAILURE = 3; 355 // The pipeline has been explicitly paused by the user. 356 PIPELINE_PAUSED = 4; 357 // The pipeline is fully functional, but there are no commits to process. 358 PIPELINE_STANDBY = 5; 359 } 360 361 // EtcdPipelineInfo is proto that Pachd stores in etcd for each pipeline. It 362 // tracks the state of the pipeline, and points to its metadata in PFS (and, 363 // by pointing to a PFS commit, de facto tracks the pipeline's version) 364 message EtcdPipelineInfo { 365 PipelineState state = 1; 366 string reason = 4; 367 pfs_1_8.Commit spec_commit = 2; 368 map<int32, int32> job_counts = 3; 369 string auth_token = 5; 370 JobState last_job_state = 6; 371 } 372 373 message PipelineInfo { 374 reserved 3, 4, 22, 26; 375 string id = 17 [(gogoproto.customname) = "ID"]; 376 Pipeline pipeline = 1; 377 uint64 version = 11; 378 Transform transform = 2; 379 ParallelismSpec parallelism_spec = 10; 380 HashtreeSpec hashtree_spec = 42; 381 Egress egress = 15; 382 google.protobuf.Timestamp created_at = 6; 383 384 // state indicates the current state of the pipeline. This is not stored in 385 // PFS along with the rest of this data structure--PPS.InspectPipeline fills 386 // it in 387 PipelineState state = 7; 388 // same for stopped field 389 bool stopped = 38; 390 string recent_error = 8; 391 392 // job_counts and last_job_state indicates the number of jobs within this 393 // pipeline in a given state and the state of the most recently created job, 394 // respectively. This is not stored in PFS along with the rest of this data 395 // structure--PPS.InspectPipeline fills it in from the EtcdPipelineInfo. 396 map<int32, int32> job_counts = 9; 397 JobState last_job_state = 43; 398 399 string output_branch = 16; 400 google.protobuf.Duration scale_down_threshold = 18; 401 ResourceSpec resource_requests = 19; 402 ResourceSpec resource_limits = 31; 403 Input input = 20; 404 string description = 21; 405 string cache_size = 23; 406 bool enable_stats = 24; 407 string salt = 25; 408 bool batch = 27; 409 410 // reason includes any error messages associated with a failed pipeline 411 string reason = 28; 412 int64 max_queue_size = 29; 413 Service service = 30; 414 Spout spout = 45; 415 ChunkSpec chunk_spec = 32; 416 google.protobuf.Duration datum_timeout = 33; 417 google.protobuf.Duration job_timeout = 34; 418 string githook_url = 35 [(gogoproto.customname) = "GithookURL"]; 419 pfs_1_8.Commit spec_commit = 36; 420 bool standby = 37; 421 int64 datum_tries = 39; 422 SchedulingSpec scheduling_spec = 40; 423 string pod_spec = 41; 424 string pod_patch = 44; 425 } 426 427 message PipelineInfos { 428 repeated PipelineInfo pipeline_info = 1; 429 } 430 431 message CreateJobRequest { 432 reserved 3, 4, 1, 10, 7, 9, 8, 12, 11, 13, 14, 21, 15, 16, 17, 18, 19, 20, 22, 23, 24; 433 Pipeline pipeline = 2; 434 pfs_1_8.Commit output_commit = 25; 435 } 436 437 message InspectJobRequest { 438 // Callers should set either Job or OutputCommit, not both. 439 Job job = 1; 440 pfs_1_8.Commit output_commit = 3; 441 bool block_state = 2; // block until state is either JOB_STATE_FAILURE or JOB_STATE_SUCCESS 442 } 443 444 message ListJobRequest { 445 Pipeline pipeline = 1; // nil means all pipelines 446 repeated pfs_1_8.Commit input_commit = 2; // nil means all inputs 447 pfs_1_8.Commit output_commit = 3; 448 } 449 450 message FlushJobRequest { 451 repeated pfs_1_8.Commit commits = 1; 452 repeated Pipeline to_pipelines = 2; 453 } 454 455 message DeleteJobRequest { 456 Job job = 1; 457 } 458 459 message StopJobRequest { 460 Job job = 1; 461 } 462 463 message GetLogsRequest { 464 reserved 4; 465 // The pipeline from which we want to get logs (required if the job in 'job' 466 // was created as part of a pipeline. To get logs from a non-orphan job 467 // without the pipeline that created it, you need to use ElasticSearch). 468 Pipeline pipeline = 2; 469 470 // The job from which we want to get logs. 471 Job job = 1; 472 473 // Names of input files from which we want processing logs. This may contain 474 // multiple files, to query pipelines that contain multiple inputs. Each 475 // filter may be an absolute path of a file within a pps repo, or it may be 476 // a hash for that file (to search for files at specific versions) 477 repeated string data_filters = 3; 478 479 Datum datum = 6; 480 481 // If true get logs from the master process 482 bool master = 5; 483 484 // Continue to follow new logs as they become available. 485 bool follow = 7; 486 487 // If nonzero, the number of lines from the end of the logs to return. Note: 488 // tail applies per container, so you will get tail * <number of pods> total 489 // lines back. 490 int64 tail = 8; 491 } 492 493 // LogMessage is a log line from a PPS worker, annotated with metadata 494 // indicating when and why the line was logged. 495 message LogMessage { 496 // The job and pipeline for which a PFS file is being processed (if the job 497 // is an orphan job, pipeline name and ID will be unset) 498 string pipeline_name = 1; 499 string job_id = 3 [(gogoproto.customname) = "JobID"]; 500 string worker_id = 7 [(gogoproto.customname) = "WorkerID"]; 501 string datum_id = 9 [(gogoproto.customname) = "DatumID"]; 502 bool master = 10; 503 504 // The PFS files being processed (one per pipeline/job input) 505 repeated InputFile data = 4; 506 507 // User is true if log message comes from the users code. 508 bool user = 8; 509 510 // The message logged, and the time at which it was logged 511 google.protobuf.Timestamp ts = 5; 512 string message = 6; 513 } 514 515 message RestartDatumRequest { 516 Job job = 1; 517 repeated string data_filters = 2; 518 } 519 520 message InspectDatumRequest { 521 Datum datum = 1; 522 } 523 524 message ListDatumRequest { 525 Job job = 1; 526 int64 page_size = 2; 527 int64 page = 3; 528 } 529 530 message ListDatumResponse { 531 repeated DatumInfo datum_infos = 1; 532 int64 total_pages = 2; 533 int64 page = 3; 534 } 535 536 // ListDatumStreamResponse is identical to ListDatumResponse, except that only 537 // one DatumInfo is present (as these responses are streamed) 538 message ListDatumStreamResponse { 539 DatumInfo datum_info = 1; 540 // total_pages is only set in the first response (and set to 0 in all other 541 // responses) 542 int64 total_pages = 2; 543 // page is only set in the first response (and set to 0 in all other 544 // responses) 545 int64 page = 3; 546 } 547 548 // ChunkSpec specifies how a pipeline should chunk its datums. 549 message ChunkSpec { 550 // number, if nonzero, specifies that each chunk should contain `number` 551 // datums. Chunks may contain fewer if the total number of datums don't 552 // divide evenly. 553 int64 number = 1; 554 // size_bytes, if nonzero, specifies a target size for each chunk of datums. 555 // Chunks may be larger or smaller than size_bytes, but will usually be 556 // pretty close to size_bytes in size. 557 int64 size_bytes = 2; 558 } 559 560 message SchedulingSpec { 561 map<string, string> node_selector = 1; 562 string priority_class_name = 2; 563 } 564 565 message CreatePipelineRequest { 566 reserved 3, 4, 15; 567 Pipeline pipeline = 1; 568 Transform transform = 2; 569 ParallelismSpec parallelism_spec = 7; 570 HashtreeSpec hashtree_spec = 31; 571 Egress egress = 9; 572 bool update = 5; 573 string output_branch = 10; 574 google.protobuf.Duration scale_down_threshold = 11; 575 ResourceSpec resource_requests = 12; 576 ResourceSpec resource_limits = 22; 577 Input input = 13; 578 string description = 14; 579 string cache_size = 16; 580 bool enable_stats = 17; 581 // Reprocess forces the pipeline to reprocess all datums. 582 // It only has meaning if Update is true 583 bool reprocess = 18; 584 bool batch = 19; 585 int64 max_queue_size = 20; 586 Service service = 21; 587 Spout spout = 33; 588 ChunkSpec chunk_spec = 23; 589 google.protobuf.Duration datum_timeout = 24; 590 google.protobuf.Duration job_timeout = 25; 591 string salt = 26; 592 bool standby = 27; 593 int64 datum_tries = 28; 594 SchedulingSpec scheduling_spec = 29; 595 string pod_spec = 30; // deprecated, use pod_patch below 596 string pod_patch = 32; // a json patch will be applied to the pipeline's pod_spec before it's created; 597 } 598 599 message InspectPipelineRequest { 600 Pipeline pipeline = 1; 601 } 602 603 message ListPipelineRequest { 604 } 605 606 message DeletePipelineRequest { 607 reserved 2, 3; 608 Pipeline pipeline = 1; 609 bool all = 4; 610 bool force = 5; 611 } 612 613 message StartPipelineRequest { 614 Pipeline pipeline = 1; 615 } 616 617 message StopPipelineRequest { 618 Pipeline pipeline = 1; 619 } 620 621 message RerunPipelineRequest { 622 Pipeline pipeline = 1; 623 repeated pfs_1_8.Commit exclude = 2; 624 repeated pfs_1_8.Commit include = 3; 625 } 626 627 message GarbageCollectRequest { 628 // Memory is how much memory to use in computing which objects are alive. A 629 // larger number will result in more precise garbage collection (at the 630 // cost of more memory usage). 631 int64 memory_bytes = 1; 632 } 633 message GarbageCollectResponse {} 634 635 message ActivateAuthRequest {} 636 message ActivateAuthResponse {} 637 638 service API { 639 rpc CreateJob(CreateJobRequest) returns (Job) {} 640 rpc InspectJob(InspectJobRequest) returns (JobInfo) {} 641 // ListJob returns information about current and past Pachyderm jobs. This is 642 // deprecated in favor of ListJobStream 643 rpc ListJob(ListJobRequest) returns (JobInfos) {} 644 // ListJobStream returns information about current and past Pachyderm jobs. 645 rpc ListJobStream(ListJobRequest) returns (stream JobInfo) {} 646 rpc FlushJob(FlushJobRequest) returns (stream JobInfo) {} 647 rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {} 648 rpc StopJob(StopJobRequest) returns (google.protobuf.Empty) {} 649 rpc InspectDatum(InspectDatumRequest) returns (DatumInfo) {} 650 // ListDatum returns information about each datum fed to a Pachyderm job. This 651 // is deprecated in favor of ListDatumStream 652 rpc ListDatum(ListDatumRequest) returns (ListDatumResponse) {} 653 // ListDatumStream returns information about each datum fed to a Pachyderm job 654 rpc ListDatumStream(ListDatumRequest) returns (stream ListDatumStreamResponse) {} 655 rpc RestartDatum(RestartDatumRequest) returns (google.protobuf.Empty) {} 656 657 rpc CreatePipeline(CreatePipelineRequest) returns (google.protobuf.Empty) {} 658 rpc InspectPipeline(InspectPipelineRequest) returns (PipelineInfo) {} 659 rpc ListPipeline(ListPipelineRequest) returns (PipelineInfos) {} 660 rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {} 661 rpc StartPipeline(StartPipelineRequest) returns (google.protobuf.Empty) {} 662 rpc StopPipeline(StopPipelineRequest) returns (google.protobuf.Empty) {} 663 rpc RerunPipeline(RerunPipelineRequest) returns (google.protobuf.Empty) {} 664 665 // DeleteAll deletes everything 666 rpc DeleteAll(google.protobuf.Empty) returns (google.protobuf.Empty) {} 667 rpc GetLogs(GetLogsRequest) returns (stream LogMessage) {} 668 669 // Garbage collection 670 rpc GarbageCollect(GarbageCollectRequest) returns (GarbageCollectResponse) {} 671 672 // An internal call that causes PPS to put itself into an auth-enabled state 673 // (all pipeline have tokens, correct permissions, etcd) 674 rpc ActivateAuth(ActivateAuthRequest) returns (ActivateAuthResponse) {} 675 }