github.com/pachyderm/pachyderm@v1.13.4/src/client/admin/v1_9/pfs/pfs.proto (about) 1 syntax = "proto3"; 2 3 package pfs_1_9; 4 option go_package = "github.com/pachyderm/pachyderm/src/client/admin/v1_9/pfs"; 5 6 import "google/protobuf/empty.proto"; 7 import "google/protobuf/timestamp.proto"; 8 import "google/protobuf/wrappers.proto"; 9 10 import "gogoproto/gogo.proto"; 11 12 import "client/admin/v1_9/auth/auth.proto"; 13 14 //// PFS Data structures (stored in etcd) 15 16 message Repo { 17 string name = 1; 18 } 19 20 message Branch { 21 Repo repo = 1; 22 string name = 2; 23 } 24 25 message BranchInfo { 26 Branch branch = 4; 27 Commit head = 2; 28 repeated Branch provenance = 3; 29 repeated Branch subvenance = 5; 30 repeated Branch direct_provenance = 6; 31 32 // Deprecated field left for backward compatibility. 33 string name = 1; 34 } 35 36 message BranchInfos { 37 repeated BranchInfo branch_info = 1; 38 } 39 40 message File { 41 Commit commit = 1; 42 string path = 2; 43 } 44 45 message Block { 46 string hash = 1; 47 } 48 49 message Object { 50 string hash = 1; 51 } 52 53 message Tag { 54 string name = 1; 55 } 56 57 // RepoInfo is the main data structure representing a Repo in etcd 58 message RepoInfo { 59 reserved 4; 60 Repo repo = 1; 61 google.protobuf.Timestamp created = 2; 62 uint64 size_bytes = 3; 63 string description = 5; 64 repeated Branch branches = 7; 65 66 // Set by ListRepo and InspectRepo if Pachyderm's auth system is active, but 67 // not stored in etcd. To set a user's auth scope for a repo, use the 68 // Pachyderm Auth API (in src/client/auth/auth.proto) 69 RepoAuthInfo auth_info = 6; 70 } 71 72 // RepoAuthInfo includes the caller's access scope for a repo, and is returned 73 // by ListRepo and InspectRepo but not persisted in etcd. It's used by the 74 // Pachyderm dashboard to render repo access appropriately. To set a user's auth 75 // scope for a repo, use the Pachyderm Auth API (in src/client/auth/auth.proto) 76 message RepoAuthInfo { 77 // The callers access level to the relevant repo (e.g. may be OWNER even if 78 // the user isn't an OWNER of the repo, if they're an admin for the cluster) 79 auth_1_9.Scope access_level = 1; 80 } 81 82 // These are the different places where a commit may be originated from 83 enum OriginKind { 84 USER = 0; 85 AUTO = 1; 86 FSCK = 2; 87 } 88 89 message CommitOrigin { 90 OriginKind kind = 1; 91 } 92 // Commit is a reference to a commit (e.g. the collection of branches and the 93 // collection of currently-open commits in etcd are collections of Commit 94 // protos) 95 message Commit { 96 Repo repo = 1; 97 string id = 2 [(gogoproto.customname) = "ID"]; 98 } 99 100 // CommitRange represents chain of commits with Lower being an ancestor of 101 // Upper or, in the case of a range of size 1, the same commit. 102 message CommitRange { 103 Commit lower = 1; 104 Commit upper = 2; 105 } 106 107 // CommitProvenance keeps track of where (i.e. which branch) a certain commit 108 // originated from. A commit's provenance consists of the commits of 109 // the commits which are in its causal history. 110 message CommitProvenance { 111 Commit commit = 1; 112 Branch branch = 2; 113 } 114 115 // CommitInfo is the main data structure representing a commit in etcd 116 message CommitInfo { 117 reserved 6, 10; 118 Commit commit = 1; 119 Branch branch = 15; 120 CommitOrigin origin = 17; 121 // description is a user-provided script describing this commit 122 string description = 8; 123 Commit parent_commit = 2; 124 repeated Commit child_commits = 11; 125 google.protobuf.Timestamp started = 3; 126 google.protobuf.Timestamp finished = 4; 127 uint64 size_bytes = 5; 128 129 // the commits and their original branches on which this commit is provenant 130 repeated CommitProvenance provenance = 16; 131 132 // ReadyProvenance is the number of provenant commits which have been 133 // finished, if ReadyProvenance == len(Provenance) then the commit is ready 134 // to be processed by pps. 135 int64 ready_provenance = 12; 136 137 repeated CommitRange subvenance = 9; 138 // this is the block that stores the serialized form of a tree that 139 // represents the entire file system hierarchy of the repo at this commit 140 // If this is nil, then the commit is either open (in which case 'finished' 141 // will also be nil) or is the output commit of a failed job (in which case 142 // 'finished' will have a value -- the end time of the job) 143 Object tree = 7; 144 repeated Object trees = 13; 145 Object datums = 14; 146 147 int64 subvenant_commits_success = 18; 148 int64 subvenant_commits_failure = 19; 149 int64 subvenant_commits_total = 20; 150 } 151 152 enum FileType { 153 RESERVED = 0; 154 FILE = 1; 155 DIR = 2; 156 } 157 158 message FileInfo { 159 File file = 1; 160 FileType file_type = 2; 161 uint64 size_bytes = 3; 162 google.protobuf.Timestamp committed = 10; 163 // the base names (i.e. just the filenames, not the full paths) of 164 // the children 165 repeated string children = 6; 166 repeated Object objects = 8; 167 repeated BlockRef blockRefs = 9; 168 bytes hash = 7; 169 } 170 171 message ByteRange { 172 uint64 lower = 1; 173 uint64 upper = 2; 174 } 175 176 message BlockRef { 177 Block block = 1; 178 ByteRange range = 2; 179 } 180 181 message ObjectInfo { 182 Object object = 1; 183 BlockRef block_ref = 2; 184 } 185 186 message Merge { 187 repeated string prefixes = 1; 188 } 189 190 message Shard { 191 PathRange range = 1; 192 } 193 194 message PathRange { 195 string lower = 1; 196 string upper = 2; 197 } 198 199 // PFS API 200 201 message CreateRepoRequest { 202 reserved 2; 203 Repo repo = 1; 204 string description = 3; 205 bool update = 4; 206 } 207 208 message InspectRepoRequest { 209 Repo repo = 1; 210 } 211 212 message ListRepoRequest { 213 reserved 1; 214 } 215 216 message ListRepoResponse { 217 repeated RepoInfo repo_info = 1; 218 } 219 220 message DeleteRepoRequest { 221 Repo repo = 1; 222 bool force = 2; 223 bool all = 3; 224 } 225 226 // CommitState describes the states a commit can be in. 227 // The states are increasingly specific, i.e. a commit that is FINISHED also counts as STARTED. 228 enum CommitState { 229 STARTED = 0; // The commit has been started, all commits satisfy this state. 230 READY = 1; // The commit has been started, and all of its provenant commits have been finished. 231 FINISHED = 2; // The commit has been finished. 232 } 233 234 message StartCommitRequest { 235 reserved 2; 236 // Parent.ID may be empty in which case the commit that Branch points to will be used as the parent. 237 // If branch is empty, or if branch does not exist, the commit will have no parent. 238 Commit parent = 1; 239 // description is a user-provided string describing this commit 240 string description = 4; 241 string branch = 3; 242 repeated CommitProvenance provenance = 5; 243 } 244 245 message BuildCommitRequest { 246 reserved 2; 247 Commit parent = 1; 248 string branch = 4; 249 repeated CommitProvenance provenance = 6; 250 Object tree = 3; 251 repeated Object trees = 7; 252 Object datums = 8; 253 // ID sets the ID of the created commit. 254 string ID = 5; 255 uint64 size_bytes = 9; 256 } 257 258 message FinishCommitRequest { 259 Commit commit = 1; 260 // description is a user-provided string describing this commit. Setting this 261 // will overwrite the description set in StartCommit 262 string description = 2; 263 264 Object tree = 3; 265 repeated Object trees = 5; 266 Object datums = 7; 267 uint64 size_bytes = 6; 268 // If set, 'commit' will be closed (its 'finished' field will be set to the 269 // current time) but its 'tree' will be left nil. 270 bool empty = 4; 271 } 272 273 message InspectCommitRequest { 274 Commit commit = 1; 275 // BlockState causes inspect commit to block until the commit is in the desired state. 276 CommitState block_state = 2; 277 } 278 279 message ListCommitRequest { 280 Repo repo = 1; 281 Commit from = 2; 282 Commit to = 3; 283 uint64 number = 4; 284 bool reverse = 5; // Return commits oldest to newest 285 } 286 287 message CommitInfos { 288 repeated CommitInfo commit_info = 1; 289 } 290 291 message CreateBranchRequest { 292 Commit head = 1; 293 // s_branch matches the field number and type of SetBranchRequest.Branch in 294 // Pachyderm 1.6--so that operations (generated by pachyderm 1.6's 295 // Admin.Export) can be deserialized by pachyderm 1.7 correctly 296 string s_branch = 2; 297 Branch branch = 3; 298 repeated Branch provenance = 4; 299 } 300 301 message InspectBranchRequest { 302 Branch branch = 1; 303 } 304 305 message ListBranchRequest { 306 Repo repo = 1; 307 bool reverse = 2; // Returns branches oldest to newest 308 } 309 310 message DeleteBranchRequest { 311 Branch branch = 1; 312 bool force = 2; 313 } 314 315 message DeleteCommitRequest { 316 Commit commit = 1; 317 } 318 319 message FlushCommitRequest { 320 repeated Commit commits = 1; 321 repeated Repo to_repos = 2; 322 } 323 324 message SubscribeCommitRequest { 325 Repo repo = 1; 326 string branch = 2; 327 CommitProvenance prov = 5; 328 // only commits created since this commit are returned 329 Commit from = 3; 330 // Don't return commits until they're in (at least) the desired state. 331 CommitState state = 4; 332 } 333 334 message GetFileRequest { 335 File file = 1; 336 int64 offset_bytes = 2; 337 int64 size_bytes = 3; 338 } 339 340 enum Delimiter { 341 NONE = 0; 342 JSON = 1; 343 LINE = 2; 344 SQL = 3; 345 CSV = 4; 346 } 347 348 // An OverwriteIndex specifies the index of objects from which new writes 349 // are applied to. Existing objects starting from the index are deleted. 350 // We want a separate message for ObjectIndex because we want to be able to 351 // distinguish between a zero index and a non-existent index. 352 message OverwriteIndex { 353 int64 index = 1; 354 } 355 356 message PutFileRequest { 357 reserved 2, 4; 358 File file = 1; 359 bytes value = 3; 360 string url = 5; 361 // applies only to URLs that can be recursively walked, for example s3:// URLs 362 bool recursive = 6; 363 // Delimiter causes data to be broken up into separate files with File.Path 364 // as a prefix. 365 Delimiter delimiter = 7; 366 // TargetFileDatums specifies the target number of datums in each written 367 // file it may be lower if data does not split evenly, but will never be 368 // higher, unless the value is 0. 369 int64 target_file_datums = 8; 370 // TargetFileBytes specifies the target number of bytes in each written 371 // file, files may have more or fewer bytes than the target. 372 int64 target_file_bytes = 9; 373 // header_records is an option for splitting data when 'delimiter' is not NONE 374 // (or SQL). It specifies the number of records that are converted to a 375 // header and applied to all file shards. 376 // 377 // This is particularly useful for CSV files, where the first row often 378 // contains column titles; if 'header_records' is set to one in that case, 379 // the first row will be associated with the directory that contains the rest 380 // of the split-up csv rows as files, and if any data is retrieved from that 381 // directory by GetFile, it will appear to begin with that first row of 382 // column labels (including in pipeline workers). 383 // 384 // Note that SQL files have their own logic for determining headers (their 385 // header is not a number of records, but a collection of SQL commands that 386 // create the relevant tables and such). This way, SQL files retrieved by 387 // GetFile can be passed to psql, and they will set up the appropriate tables 388 // before inserting the records in the files that were retrieved. 389 int64 header_records = 11; 390 // overwrite_index is the object index where the write starts from. All 391 // existing objects starting from the index are deleted. 392 OverwriteIndex overwrite_index = 10; 393 } 394 395 // PutFileRecord is used to record PutFile requests in etcd temporarily. 396 message PutFileRecord { 397 int64 size_bytes = 1; 398 string object_hash = 2; 399 OverwriteIndex overwrite_index = 3; 400 } 401 402 message PutFileRecords { 403 bool split = 1; 404 repeated PutFileRecord records = 2; 405 bool tombstone = 3; 406 PutFileRecord header = 4; 407 PutFileRecord footer = 5; 408 } 409 410 message CopyFileRequest { 411 File src = 1; 412 File dst = 2; 413 bool overwrite = 3; 414 } 415 416 message InspectFileRequest { 417 File file = 1; 418 } 419 420 message ListFileRequest { 421 // File is the parent directory of the files we want to list. This sets the 422 // repo, the commit/branch, and path prefix of files we're interested in 423 // If the "path" field is omitted, a list of files at the top level of the repo 424 // is returned 425 File file = 1; 426 427 // Full indicates whether the result should include file contents, which may 428 // be large (i.e. the list of children for directories, and the list of object 429 // references for regular files) 430 bool full = 2; 431 432 // History indicates how many historical versions you want returned. Its 433 // semantics are: 434 // 0: Return the files as they are at the commit in `file`. FileInfo.File 435 // will equal File in this request. 436 // 1: Return the files as they are in the last commit they were modified in. 437 // (This will have the same hash as if you'd passed 0, but 438 // FileInfo.File.Commit will be different. 439 // 2: Return the above and the files as they are in the next-last commit they 440 // were modified in. 441 // 3: etc. 442 //-1: Return all historical versions. 443 int64 history = 3; 444 } 445 446 message WalkFileRequest { 447 File file = 1; 448 } 449 450 message GlobFileRequest { 451 Commit commit = 1; 452 string pattern = 2; 453 } 454 455 // FileInfos is the result of both ListFile and GlobFile 456 message FileInfos { 457 repeated FileInfo file_info = 1; 458 } 459 460 message DiffFileRequest { 461 File new_file = 1; 462 // OldFile may be left nil in which case the same path in the parent of 463 // NewFile's commit will be used. 464 File old_file = 2; 465 bool shallow = 3; 466 } 467 468 message DiffFileResponse { 469 repeated FileInfo new_files = 1; 470 repeated FileInfo old_files = 2; 471 } 472 473 message DeleteFileRequest { 474 File file = 1; 475 } 476 477 message FsckRequest { 478 bool fix = 1; 479 } 480 481 message FsckResponse { 482 string fix = 1; 483 string error = 2; 484 } 485 486 service API { 487 // Repo rpcs 488 // CreateRepo creates a new repo. 489 // An error is returned if the repo already exists. 490 rpc CreateRepo(CreateRepoRequest) returns (google.protobuf.Empty) {} 491 // InspectRepo returns info about a repo. 492 rpc InspectRepo(InspectRepoRequest) returns (RepoInfo) {} 493 // ListRepo returns info about all repos. 494 rpc ListRepo(ListRepoRequest) returns (ListRepoResponse) {} 495 // DeleteRepo deletes a repo. 496 rpc DeleteRepo(DeleteRepoRequest) returns (google.protobuf.Empty) {} 497 498 // Commit rpcs 499 // StartCommit creates a new write commit from a parent commit. 500 rpc StartCommit(StartCommitRequest) returns (Commit) {} 501 // FinishCommit turns a write commit into a read commit. 502 rpc FinishCommit(FinishCommitRequest) returns (google.protobuf.Empty) {} 503 // InspectCommit returns the info about a commit. 504 rpc InspectCommit(InspectCommitRequest) returns (CommitInfo) {} 505 // ListCommit returns info about all commits. This is deprecated in favor of 506 // ListCommitStream. 507 rpc ListCommit(ListCommitRequest) returns (CommitInfos) {} 508 // ListCommitStream is like ListCommit, but returns its results in a GRPC stream 509 rpc ListCommitStream(ListCommitRequest) returns (stream CommitInfo) {} 510 // DeleteCommit deletes a commit. 511 rpc DeleteCommit(DeleteCommitRequest) returns (google.protobuf.Empty) {} 512 // FlushCommit waits for downstream commits to finish 513 rpc FlushCommit(FlushCommitRequest) returns (stream CommitInfo) {} 514 // SubscribeCommit subscribes for new commits on a given branch 515 rpc SubscribeCommit(SubscribeCommitRequest) returns (stream CommitInfo) {} 516 // BuildCommit builds a commit that's backed by the given tree 517 rpc BuildCommit(BuildCommitRequest) returns (Commit) {} 518 519 // CreateBranch creates a new branch 520 rpc CreateBranch(CreateBranchRequest) returns (google.protobuf.Empty) {} 521 // InspectBranch returns info about a branch. 522 rpc InspectBranch(InspectBranchRequest) returns (BranchInfo) {} 523 // ListBranch returns info about the heads of branches. 524 rpc ListBranch(ListBranchRequest) returns (BranchInfos) {} 525 // DeleteBranch deletes a branch; note that the commits still exist. 526 rpc DeleteBranch(DeleteBranchRequest) returns (google.protobuf.Empty) {} 527 528 // File rpcs 529 // PutFile writes the specified file to pfs. 530 rpc PutFile(stream PutFileRequest) returns (google.protobuf.Empty) {} 531 // CopyFile copies the contents of one file to another. 532 rpc CopyFile(CopyFileRequest) returns (google.protobuf.Empty) {} 533 // GetFile returns a byte stream of the contents of the file. 534 rpc GetFile(GetFileRequest) returns (stream google.protobuf.BytesValue) {} 535 // InspectFile returns info about a file. 536 rpc InspectFile(InspectFileRequest) returns (FileInfo) {} 537 // ListFile returns info about all files. This is deprecated in favor of 538 // ListFileStream 539 rpc ListFile(ListFileRequest) returns (FileInfos) {} 540 // ListFileStream is a streaming version of ListFile 541 // TODO(msteffen): When the dash has been updated to use ListFileStream, 542 // replace ListFile with this RPC (https://github.com/pachyderm/dash/issues/201) 543 rpc ListFileStream(ListFileRequest) returns (stream FileInfo) {} 544 // WalkFile walks over all the files under a directory, including children of children. 545 rpc WalkFile(WalkFileRequest) returns (stream FileInfo) {} 546 // GlobFile returns info about all files. This is deprecated in favor of 547 // GlobFileStream 548 rpc GlobFile(GlobFileRequest) returns (FileInfos) {} 549 // GlobFileStream is a streaming version of GlobFile 550 // TODO(msteffen): When the dash has been updated to use GlobFileStream, 551 // replace GlobFile with this RPC (https://github.com/pachyderm/dash/issues/201) 552 rpc GlobFileStream(GlobFileRequest) returns (stream FileInfo) {} 553 // DiffFile returns the differences between 2 paths at 2 commits. 554 rpc DiffFile(DiffFileRequest) returns (DiffFileResponse) {} 555 // DeleteFile deletes a file. 556 rpc DeleteFile(DeleteFileRequest) returns (google.protobuf.Empty) {} 557 558 // DeleteAll deletes everything 559 rpc DeleteAll(google.protobuf.Empty) returns (google.protobuf.Empty) {} 560 // Fsck does a file system consistency check for pfs 561 rpc Fsck(FsckRequest) returns (stream FsckResponse) {} 562 } 563 564 message PutObjectRequest { 565 bytes value = 1; 566 repeated Tag tags = 2; 567 Block block = 3; 568 } 569 570 message CreateObjectRequest { 571 Object object = 1; 572 BlockRef block_ref = 2; 573 } 574 575 message GetObjectsRequest { 576 repeated Object objects = 1; 577 uint64 offset_bytes = 2; 578 // The number of bytes requested. 579 uint64 size_bytes = 3; 580 // The total amount of bytes in these objects. It's OK if it's not 581 // entirely accurate or if it's unknown (in which case it'd be set to 0). 582 // It's used primarily as a hint for cache eviction. 583 uint64 total_size = 4; 584 } 585 586 message PutBlockRequest { 587 Block block = 1; 588 bytes value = 2; 589 } 590 591 message GetBlockRequest { 592 Block block = 1; 593 } 594 595 message GetBlocksRequest { 596 repeated BlockRef blockRefs = 1; 597 uint64 offset_bytes = 2; 598 // The number of bytes requested. 599 uint64 size_bytes = 3; 600 // The total amount of bytes in these blocks. It's OK if it's not 601 // entirely accurate or if it's unknown (in which case it'd be set to 0). 602 // It's used primarily as a hint for cache eviction. 603 uint64 total_size = 4; 604 } 605 606 message ListBlockRequest {} 607 608 message TagObjectRequest { 609 Object object = 1; 610 repeated Tag tags = 2; 611 } 612 613 message ListObjectsRequest {} 614 615 message ListTagsRequest { 616 string prefix = 1; 617 bool include_object = 2; 618 } 619 620 message ListTagsResponse { 621 Tag tag = 1; 622 Object object = 2; 623 } 624 625 message DeleteObjectsRequest { 626 repeated Object objects = 1; 627 } 628 629 message DeleteObjectsResponse {} 630 631 message DeleteTagsRequest { 632 repeated Tag tags = 1; 633 } 634 635 message DeleteTagsResponse {} 636 637 message CheckObjectRequest { 638 Object object = 1; 639 } 640 641 message CheckObjectResponse { 642 bool exists = 1; 643 } 644 645 message Objects { 646 repeated Object objects = 1; 647 } 648 649 service ObjectAPI { 650 rpc PutObject(stream PutObjectRequest) returns (Object) {} 651 rpc PutObjectSplit(stream PutObjectRequest) returns (Objects) {} 652 rpc PutObjects(stream PutObjectRequest) returns (google.protobuf.Empty) {} 653 rpc CreateObject(CreateObjectRequest) returns (google.protobuf.Empty) {} 654 rpc GetObject(Object) returns (stream google.protobuf.BytesValue) {} 655 rpc GetObjects(GetObjectsRequest) returns (stream google.protobuf.BytesValue) {} 656 rpc PutBlock(stream PutBlockRequest) returns (google.protobuf.Empty) {} 657 rpc GetBlock(GetBlockRequest) returns (stream google.protobuf.BytesValue) {} 658 rpc GetBlocks(GetBlocksRequest) returns (stream google.protobuf.BytesValue) {} 659 rpc ListBlock(ListBlockRequest) returns (stream Block) {} 660 rpc TagObject(TagObjectRequest) returns (google.protobuf.Empty) {} 661 rpc InspectObject(Object) returns (ObjectInfo) {} 662 // CheckObject checks if an object exists in the blob store without 663 // actually reading the object. 664 rpc CheckObject(CheckObjectRequest) returns (CheckObjectResponse) {} 665 rpc ListObjects(ListObjectsRequest) returns (stream ObjectInfo) {} 666 rpc DeleteObjects(DeleteObjectsRequest) returns (DeleteObjectsResponse) {} 667 rpc GetTag(Tag) returns (stream google.protobuf.BytesValue) {} 668 rpc InspectTag(Tag) returns (ObjectInfo) {} 669 rpc ListTags(ListTagsRequest) returns (stream ListTagsResponse) {} 670 rpc DeleteTags(DeleteTagsRequest) returns (DeleteTagsResponse) {} 671 rpc Compact(google.protobuf.Empty) returns (google.protobuf.Empty) {} 672 } 673 674 message ObjectIndex { 675 map<string, BlockRef> objects = 1; 676 map<string, Object> tags = 2; 677 }