github.com/pachyderm/pachyderm@v1.13.4/src/client/admin/v1_7/pps/pps.proto (about) 1 syntax = "proto3"; 2 3 package pps_1_7; 4 option go_package = "github.com/pachyderm/pachyderm/src/client/admin/v1_7/pps"; 5 6 import "google/protobuf/empty.proto"; 7 import "google/protobuf/timestamp.proto"; 8 import "google/protobuf/duration.proto"; 9 10 import "gogoproto/gogo.proto"; 11 12 import "client/admin/v1_7/pfs/pfs.proto"; 13 14 message Secret { 15 // Name must be the name of the secret in kubernetes. 16 string name = 1; 17 // Key of the secret to load into env_var, this field only has meaning if EnvVar != "". 18 string key = 4; 19 string mount_path = 2; 20 string env_var = 3; 21 } 22 23 message Transform { 24 string image = 1; 25 repeated string cmd = 2; 26 map<string, string> env = 3; 27 repeated Secret secrets = 4; 28 repeated string image_pull_secrets = 9; 29 repeated string stdin = 5; 30 repeated int64 accept_return_code = 6; 31 bool debug = 7; 32 string user = 10; 33 string working_dir = 11; 34 } 35 36 message Egress { 37 string URL = 1; 38 } 39 40 message Job { 41 string id = 1 [(gogoproto.customname) = "ID"]; 42 } 43 44 enum JobState { 45 JOB_STARTING = 0; 46 JOB_RUNNING = 1; 47 JOB_FAILURE = 2; 48 JOB_SUCCESS = 3; 49 JOB_KILLED = 4; 50 } 51 52 message Service { 53 int32 internal_port = 1; 54 int32 external_port = 2; 55 string ip = 3 [(gogoproto.customname) = "IP"]; 56 } 57 58 message AtomInput { 59 reserved 7; 60 string name = 1; 61 string repo = 2; 62 string branch = 3; 63 string commit = 4; 64 string glob = 5; 65 bool lazy = 6; 66 // EmptyFiles, if true, will cause files from this atom to be presented as 67 // empty files. This is useful in shuffle pipelines where you want to read 68 // the names of files and reorganize them using symlinks. 69 bool empty_files = 8; 70 } 71 72 message CronInput { 73 string name = 1; 74 string repo = 2; 75 string commit = 3; 76 string spec = 4; 77 google.protobuf.Timestamp start = 5; 78 } 79 80 message GitInput { 81 string name = 1; 82 string url = 2 [(gogoproto.customname) = "URL"]; 83 string branch = 3; 84 string commit = 4; 85 } 86 87 message Input { 88 AtomInput atom = 1; 89 repeated Input cross = 2; 90 repeated Input union = 3; 91 CronInput cron = 4; 92 GitInput git = 5; 93 } 94 95 message JobInput { 96 string name = 4; 97 pfs_1_7.Commit commit = 1; 98 string glob = 2; 99 bool lazy = 3; 100 } 101 102 message ParallelismSpec { 103 reserved 1; 104 105 // Starts the pipeline/job with a 'constant' workers, unless 'constant' is 106 // zero. If 'constant' is zero (which is the zero value of ParallelismSpec), 107 // then Pachyderm will choose the number of workers that is started, 108 // (currently it chooses the number of workers in the cluster) 109 uint64 constant = 2; 110 111 // Starts the pipeline/job with number of workers equal to 'coefficient' * N, 112 // where N is the number of nodes in the kubernetes cluster. 113 // 114 // For example, if each Kubernetes node has four CPUs, you might set 115 // 'coefficient' to four, so that there are four Pachyderm workers per 116 // Kubernetes node, and each Pachyderm worker gets one CPU. If you want to 117 // reserve half the nodes in your cluster for other tasks, you might set 118 // 'coefficient' to 0.5. 119 double coefficient = 3; 120 } 121 122 // HashTreeSpec sets the number of shards into which pps splits a pipeline's 123 // output commits (sharded commits are implemented in Pachyderm 1.8+ only) 124 message HashtreeSpec { 125 uint64 constant = 1; 126 } 127 128 message InputFile { 129 // This file's absolute path within its pfs repo. 130 string path = 4; 131 132 // This file's hash 133 bytes hash = 5; 134 } 135 136 message Datum { 137 // ID is the hash computed from all the files 138 string id = 1 [(gogoproto.customname) = "ID"]; 139 Job job = 2; 140 } 141 142 enum DatumState { 143 FAILED = 0; 144 SUCCESS = 1; 145 SKIPPED = 2; 146 STARTING = 3; 147 } 148 149 message DatumInfo { 150 Datum datum = 1; 151 DatumState state = 2; 152 ProcessStats stats = 3; 153 pfs_1_7.File pfs_state = 4; 154 repeated pfs_1_7.FileInfo data = 5; 155 } 156 157 message Aggregate { 158 int64 count = 1; 159 double mean = 2; 160 double stddev = 3; 161 double fifth_percentile = 4; 162 double ninety_fifth_percentile = 5; 163 } 164 165 message ProcessStats { 166 google.protobuf.Duration download_time = 1; 167 google.protobuf.Duration process_time = 2; 168 google.protobuf.Duration upload_time = 3; 169 uint64 download_bytes = 4; 170 uint64 upload_bytes = 5; 171 } 172 173 message AggregateProcessStats { 174 Aggregate download_time = 1; 175 Aggregate process_time = 2; 176 Aggregate upload_time = 3; 177 Aggregate download_bytes = 4; 178 Aggregate upload_bytes = 5; 179 } 180 181 message WorkerStatus { 182 string worker_id = 1 [(gogoproto.customname) = "WorkerID"]; 183 string job_id = 2 [(gogoproto.customname) = "JobID"]; 184 repeated InputFile data = 3; 185 // Started is the time processing on the current datum began. 186 google.protobuf.Timestamp started = 4; 187 ProcessStats stats = 5; 188 int64 queue_size = 6; 189 } 190 191 // ResourceSpec describes the amount of resources that pipeline pods should 192 // request from kubernetes, for scheduling. 193 message ResourceSpec { 194 // The number of CPUs each worker needs (partial values are allowed, and 195 // encouraged) 196 float cpu = 1; 197 198 // The amount of memory each worker needs (in bytes, with allowed 199 // SI suffixes (M, K, G, Mi, Ki, Gi, etc). 200 string memory = 2; 201 202 // The number of GPUs each worker needs. 203 int64 gpu = 3; 204 205 // The amount of ephemeral storage each worker needs (in bytes, with allowed 206 // SI suffixes (M, K, G, Mi, Ki, Gi, etc). 207 string disk = 4; 208 } 209 210 // EtcdJobInfo is the portion of the JobInfo that gets stored in etcd during 211 // job execution. It contains fields which change over the lifetime of the job 212 // but aren't used in the execution of the job. 213 message EtcdJobInfo { 214 Job job = 1; 215 Pipeline pipeline = 2; 216 pfs_1_7.Commit output_commit = 3; 217 // Job restart count (e.g. due to datum failure) 218 uint64 restart = 4; 219 220 // Counts of how many times we processed or skipped a datum 221 int64 data_processed = 5; 222 int64 data_skipped = 6; 223 int64 data_total = 7; 224 int64 data_failed = 8; 225 226 // Download/process/upload time and download/upload bytes 227 ProcessStats stats = 9; 228 229 pfs_1_7.Commit stats_commit = 10; 230 JobState state = 11; 231 string reason = 12; 232 google.protobuf.Timestamp started = 13; 233 google.protobuf.Timestamp finished = 14; 234 } 235 236 message JobInfo { 237 reserved 4, 5; 238 Job job = 1; 239 Transform transform = 2; 240 Pipeline pipeline = 3; 241 uint64 pipeline_version = 13; 242 ParallelismSpec parallelism_spec = 12; 243 Egress egress = 15; 244 Job parent_job = 6; 245 google.protobuf.Timestamp started = 7; 246 google.protobuf.Timestamp finished = 8; 247 pfs_1_7.Commit output_commit = 9; 248 JobState state = 10; 249 string reason = 35; // reason explains why the job is in the current state 250 Service service = 14; 251 pfs_1_7.Repo output_repo = 18; 252 string output_branch = 17; 253 uint64 restart = 20; 254 int64 data_processed = 22; 255 int64 data_skipped = 30; 256 int64 data_failed = 40; 257 int64 data_total = 23; 258 ProcessStats stats = 31; 259 repeated WorkerStatus worker_status = 24; 260 ResourceSpec resource_requests = 25; 261 ResourceSpec resource_limits = 36; 262 Input input = 26; 263 pfs_1_7.BranchInfo new_branch = 27; 264 bool incremental = 28; 265 pfs_1_7.Commit stats_commit = 29; 266 bool enable_stats = 32; 267 string salt = 33; 268 bool batch = 34; 269 ChunkSpec chunk_spec = 37; 270 google.protobuf.Duration datum_timeout = 38; 271 google.protobuf.Duration job_timeout = 39; 272 int64 datum_tries = 41; 273 SchedulingSpec scheduling_spec = 42; 274 string pod_spec = 43; 275 } 276 277 enum WorkerState { 278 POD_RUNNING = 0; 279 POD_SUCCESS = 1; 280 POD_FAILED = 2; 281 } 282 283 message Worker { 284 string name = 1; 285 WorkerState state = 2; 286 } 287 288 message JobInfos { 289 repeated JobInfo job_info = 1; 290 } 291 292 message Pipeline { 293 string name = 1; 294 } 295 296 message PipelineInput { 297 string name = 5; 298 pfs_1_7.Repo repo = 1; 299 string branch = 2; 300 string glob = 3; 301 bool lazy = 4; 302 pfs_1_7.Commit from = 6; 303 } 304 305 enum PipelineState { 306 // When the pipeline is not ready to be triggered by commits. 307 // This happens when either 1) a pipeline has been created but not 308 // yet picked up by a PPS server, or 2) the pipeline does not have 309 // any inputs and is meant to be triggered manually 310 PIPELINE_STARTING = 0; 311 // After this pipeline is picked up by a pachd node. This is the normal 312 // state of a pipeline. 313 PIPELINE_RUNNING = 1; 314 // After some error caused runPipeline to exit, but before the 315 // pipeline is re-run. This is when the exponential backoff is 316 // in effect. 317 PIPELINE_RESTARTING = 2; 318 // We have retried too many times and we have given up on this pipeline. 319 PIPELINE_FAILURE = 3; 320 // The pipeline has been explicitly paused by the user. 321 PIPELINE_PAUSED = 4; 322 // The pipeline is fully functional, but there are no commits to process. 323 PIPELINE_STANDBY = 5; 324 } 325 326 // EtcdPipelineInfo is proto that Pachd stores in etcd for each pipeline. It 327 // tracks the state of the pipeline, and points to its metadata in PFS (and, 328 // by pointing to a PFS commit, de facto tracks the pipeline's version) 329 message EtcdPipelineInfo { 330 PipelineState state = 1; 331 string reason = 4; 332 pfs_1_7.Commit spec_commit = 2; 333 map<int32, int32> job_counts = 3; 334 string auth_token = 5; 335 } 336 337 message PipelineInfo { 338 reserved 3, 4, 26; 339 string id = 17 [(gogoproto.customname) = "ID"]; 340 Pipeline pipeline = 1; 341 uint64 version = 11; 342 Transform transform = 2; 343 ParallelismSpec parallelism_spec = 10; 344 HashtreeSpec hashtree_spec = 42; 345 Egress egress = 15; 346 google.protobuf.Timestamp created_at = 6; 347 348 // state indicates the current state of the pipeline. This is not stored in 349 // PFS along with the rest of this data structure--PPS.InspectPipeline fills 350 // it in 351 PipelineState state = 7; 352 // same for stopped field 353 bool stopped = 38; 354 string recent_error = 8; 355 356 // job_counts indicates the number of jobs within this pipeline in a given 357 // state. This is not stored in PFS along with the rest of this data 358 // structure--PPS.InspectPipeline fills it in 359 map<int32, int32> job_counts = 9; 360 string output_branch = 16; 361 google.protobuf.Duration scale_down_threshold = 18; 362 ResourceSpec resource_requests = 19; 363 ResourceSpec resource_limits = 31; 364 Input input = 20; 365 string description = 21; 366 bool incremental = 22; 367 string cache_size = 23; 368 bool enable_stats = 24; 369 string salt = 25; 370 bool batch = 27; 371 372 // reason includes any error messages associated with a failed pipeline 373 string reason = 28; 374 int64 max_queue_size = 29; 375 Service service = 30; 376 ChunkSpec chunk_spec = 32; 377 google.protobuf.Duration datum_timeout = 33; 378 google.protobuf.Duration job_timeout = 34; 379 string githook_url = 35 [(gogoproto.customname) = "GithookURL"]; 380 pfs_1_7.Commit spec_commit = 36; 381 bool standby = 37; 382 int64 datum_tries = 39; 383 SchedulingSpec scheduling_spec = 40; 384 string pod_spec = 41; 385 } 386 387 message PipelineInfos { 388 repeated PipelineInfo pipeline_info = 1; 389 } 390 391 message CreateJobRequest { 392 reserved 3, 4, 1, 10, 7, 9, 8, 12, 11, 13, 14, 21, 15, 16, 17, 18, 19, 20, 22, 23, 24; 393 Pipeline pipeline = 2; 394 pfs_1_7.Commit output_commit = 25; 395 } 396 397 message InspectJobRequest { 398 // Callers should set either Job or OutputCommit, not both. 399 Job job = 1; 400 pfs_1_7.Commit output_commit = 3; 401 bool block_state = 2; // block until state is either JOB_STATE_FAILURE or JOB_STATE_SUCCESS 402 } 403 404 message ListJobRequest { 405 Pipeline pipeline = 1; // nil means all pipelines 406 repeated pfs_1_7.Commit input_commit = 2; // nil means all inputs 407 pfs_1_7.Commit output_commit = 3; 408 } 409 410 message FlushJobRequest { 411 repeated pfs_1_7.Commit commits = 1; 412 repeated Pipeline to_pipelines = 2; 413 } 414 415 message DeleteJobRequest { 416 Job job = 1; 417 } 418 419 message StopJobRequest { 420 Job job = 1; 421 } 422 423 message GetLogsRequest { 424 reserved 4; 425 // The pipeline from which we want to get logs (required if the job in 'job' 426 // was created as part of a pipeline. To get logs from a non-orphan job 427 // without the pipeline that created it, you need to use ElasticSearch). 428 Pipeline pipeline = 2; 429 430 // The job from which we want to get logs. 431 Job job = 1; 432 433 // Names of input files from which we want processing logs. This may contain 434 // multiple files, to query pipelines that contain multiple inputs. Each 435 // filter may be an absolute path of a file within a pps repo, or it may be 436 // a hash for that file (to search for files at specific versions) 437 repeated string data_filters = 3; 438 439 Datum datum = 6; 440 441 // If true get logs from the master process 442 bool master = 5; 443 444 // Continue to follow new logs as they become available. 445 bool follow = 7; 446 447 // If nonzero, the number of lines from the end of the logs to return. Note: 448 // tail applies per container, so you will get tail * <number of pods> total 449 // lines back. 450 int64 tail = 8; 451 } 452 453 // LogMessage is a log line from a PPS worker, annotated with metadata 454 // indicating when and why the line was logged. 455 message LogMessage { 456 // The job and pipeline for which a PFS file is being processed (if the job 457 // is an orphan job, pipeline name and ID will be unset) 458 string pipeline_name = 1; 459 string job_id = 3 [(gogoproto.customname) = "JobID"]; 460 string worker_id = 7 [(gogoproto.customname) = "WorkerID"]; 461 string datum_id = 9 [(gogoproto.customname) = "DatumID"]; 462 bool master = 10; 463 464 // The PFS files being processed (one per pipeline/job input) 465 repeated InputFile data = 4; 466 467 // User is true if log message comes from the users code. 468 bool user = 8; 469 470 // The message logged, and the time at which it was logged 471 google.protobuf.Timestamp ts = 5; 472 string message = 6; 473 } 474 475 message RestartDatumRequest { 476 Job job = 1; 477 repeated string data_filters = 2; 478 } 479 480 message InspectDatumRequest { 481 Datum datum = 1; 482 } 483 484 message ListDatumRequest { 485 Job job = 1; 486 int64 page_size = 2; 487 int64 page = 3; 488 } 489 490 message ListDatumResponse { 491 repeated DatumInfo datum_infos = 1; 492 int64 total_pages = 2; 493 int64 page = 3; 494 } 495 496 // ListDatumStreamResponse is identical to ListDatumResponse, except that only 497 // one DatumInfo is present (as these responses are streamed) 498 message ListDatumStreamResponse { 499 DatumInfo datum_info = 1; 500 // total_pages is only set in the first response (and set to 0 in all other 501 // responses) 502 int64 total_pages = 2; 503 // page is only set in the first response (and set to 0 in all other 504 // responses) 505 int64 page = 3; 506 } 507 508 // ChunkSpec specifies how a pipeline should chunk its datums. 509 message ChunkSpec { 510 // number, if nonzero, specifies that each chunk should contain `number` 511 // datums. Chunks may contain fewer if the total number of datums don't 512 // divide evenly. 513 int64 number = 1; 514 // size_bytes, if nonzero, specifies a target size for each chunk of datums. 515 // Chunks may be larger or smaller than size_bytes, but will usually be 516 // pretty close to size_bytes in size. 517 int64 size_bytes = 2; 518 } 519 520 message SchedulingSpec { 521 map<string, string> node_selector = 1; 522 string priority_class_name = 2; 523 } 524 525 message CreatePipelineRequest { 526 reserved 3, 4; 527 Pipeline pipeline = 1; 528 Transform transform = 2; 529 ParallelismSpec parallelism_spec = 7; 530 HashtreeSpec hashtree_spec = 31; 531 Egress egress = 9; 532 bool update = 5; 533 string output_branch = 10; 534 google.protobuf.Duration scale_down_threshold = 11; 535 ResourceSpec resource_requests = 12; 536 ResourceSpec resource_limits = 22; 537 Input input = 13; 538 string description = 14; 539 bool incremental = 15; 540 string cache_size = 16; 541 bool enable_stats = 17; 542 // Reprocess forces the pipeline to reprocess all datums. 543 // It only has meaning if Update is true 544 bool reprocess = 18; 545 bool batch = 19; 546 int64 max_queue_size = 20; 547 Service service = 21; 548 ChunkSpec chunk_spec = 23; 549 google.protobuf.Duration datum_timeout = 24; 550 google.protobuf.Duration job_timeout = 25; 551 string salt = 26; 552 bool standby = 27; 553 int64 datum_tries = 28; 554 SchedulingSpec scheduling_spec = 29; 555 string pod_spec = 30; 556 } 557 558 message InspectPipelineRequest { 559 Pipeline pipeline = 1; 560 } 561 562 message ListPipelineRequest { 563 } 564 565 message DeletePipelineRequest { 566 reserved 2, 3; 567 Pipeline pipeline = 1; 568 bool all = 4; 569 bool force = 5; 570 } 571 572 message StartPipelineRequest { 573 Pipeline pipeline = 1; 574 } 575 576 message StopPipelineRequest { 577 Pipeline pipeline = 1; 578 } 579 580 message RerunPipelineRequest { 581 Pipeline pipeline = 1; 582 repeated pfs_1_7.Commit exclude = 2; 583 repeated pfs_1_7.Commit include = 3; 584 } 585 586 message GarbageCollectRequest { 587 // Memory is how much memory to use in computing which objects are alive. A 588 // larger number will result in more precise garbage collection (at the 589 // cost of more memory usage). 590 int64 memory_bytes = 1; 591 } 592 message GarbageCollectResponse {} 593 594 message ActivateAuthRequest {} 595 message ActivateAuthResponse {} 596 597 service API { 598 rpc CreateJob(CreateJobRequest) returns (Job) {} 599 rpc InspectJob(InspectJobRequest) returns (JobInfo) {} 600 // ListJob returns information about current and past Pachyderm jobs. This is 601 // deprecated in favor of ListJobStream 602 rpc ListJob(ListJobRequest) returns (JobInfos) {} 603 // ListJobStream returns information about current and past Pachyderm jobs. 604 rpc ListJobStream(ListJobRequest) returns (stream JobInfo) {} 605 rpc FlushJob(FlushJobRequest) returns (stream JobInfo) {} 606 rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {} 607 rpc StopJob(StopJobRequest) returns (google.protobuf.Empty) {} 608 rpc InspectDatum(InspectDatumRequest) returns (DatumInfo) {} 609 // ListDatum returns information about each datum fed to a Pachyderm job. This 610 // is deprecated in favor of ListDatumStream 611 rpc ListDatum(ListDatumRequest) returns (ListDatumResponse) {} 612 // ListDatumStream returns information about each datum fed to a Pachyderm job 613 rpc ListDatumStream(ListDatumRequest) returns (stream ListDatumStreamResponse) {} 614 rpc RestartDatum(RestartDatumRequest) returns (google.protobuf.Empty) {} 615 616 rpc CreatePipeline(CreatePipelineRequest) returns (google.protobuf.Empty) {} 617 rpc InspectPipeline(InspectPipelineRequest) returns (PipelineInfo) {} 618 rpc ListPipeline(ListPipelineRequest) returns (PipelineInfos) {} 619 rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {} 620 rpc StartPipeline(StartPipelineRequest) returns (google.protobuf.Empty) {} 621 rpc StopPipeline(StopPipelineRequest) returns (google.protobuf.Empty) {} 622 rpc RerunPipeline(RerunPipelineRequest) returns (google.protobuf.Empty) {} 623 624 // DeleteAll deletes everything 625 rpc DeleteAll(google.protobuf.Empty) returns (google.protobuf.Empty) {} 626 rpc GetLogs(GetLogsRequest) returns (stream LogMessage) {} 627 628 // Garbage collection 629 rpc GarbageCollect(GarbageCollectRequest) returns (GarbageCollectResponse) {} 630 631 // An internal call that causes PPS to put itself into an auth-enabled state 632 // (all pipeline have tokens, correct permissions, etcd) 633 rpc ActivateAuth(ActivateAuthRequest) returns (ActivateAuthResponse) {} 634 }