github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/model/v1beta1/job.go (about) 1 package v1beta1 2 3 import ( 4 "time" 5 6 "github.com/imdario/mergo" 7 "k8s.io/apimachinery/pkg/selection" 8 ) 9 10 // Job contains data about a job request in the bacalhau network. 11 type Job struct { 12 APIVersion string `json:"APIVersion" example:"V1beta1"` 13 14 Metadata Metadata `json:"Metadata,omitempty"` 15 16 // The specification of this job. 17 Spec Spec `json:"Spec,omitempty"` 18 19 // The status of the job: where are the nodes at, what are the events 20 Status JobStatus `json:"Status,omitempty"` 21 } 22 23 type Metadata struct { 24 // The unique global ID of this job in the bacalhau network. 25 ID string `json:"ID,omitempty" example:"92d5d4ee-3765-4f78-8353-623f5f26df08"` 26 27 // Time the job was submitted to the bacalhau network. 28 CreatedAt time.Time `json:"CreatedAt,omitempty" example:"2022-11-17T13:29:01.871140291Z"` 29 30 // The ID of the client that created this job. 31 ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"` 32 } 33 type JobRequester struct { 34 // The ID of the requester node that owns this job. 35 RequesterNodeID string `json:"RequesterNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"` 36 37 // The public key of the Requester node that created this job 38 // This can be used to encrypt messages back to the creator 39 RequesterPublicKey PublicKey `json:"RequesterPublicKey,omitempty"` 40 } 41 type JobStatus struct { 42 // The current state of the job 43 State JobState `json:"JobState,omitempty"` 44 45 // All events associated with the job 46 Events []JobEvent `json:"JobEvents,omitempty"` 47 48 // All local events associated with the job 49 LocalEvents []JobLocalEvent `json:"LocalJobEvents,omitempty"` 50 51 Requester JobRequester `json:"Requester,omitempty"` 52 } 53 54 // TODO: There's probably a better way we want to globally version APIs 55 func NewJob() *Job { 56 return &Job{ 57 APIVersion: APIVersionLatest().String(), 58 } 59 } 60 61 func NewJobWithSaneProductionDefaults() (*Job, error) { 62 j := NewJob() 63 err := mergo.Merge(j, &Job{ 64 APIVersion: APIVersionLatest().String(), 65 Spec: Spec{ 66 Engine: EngineDocker, 67 Verifier: VerifierNoop, 68 Publisher: PublisherEstuary, 69 Deal: Deal{ 70 Concurrency: 1, 71 Confidence: 0, 72 MinBids: 0, // 0 means no minimum before bidding 73 }, 74 }, 75 }) 76 if err != nil { 77 return nil, err 78 } 79 return j, nil 80 } 81 82 // JobWithInfo is the job request + the result of attempting to run it on the network 83 type JobWithInfo struct { 84 Job Job `json:"Job,omitempty"` 85 JobState JobState `json:"JobState,omitempty"` 86 JobEvents []JobEvent `json:"JobEvents,omitempty"` 87 JobLocalEvents []JobLocalEvent `json:"JobLocalEvents,omitempty"` 88 } 89 90 // JobShard contains data about a job shard in the bacalhau network. 91 type JobShard struct { 92 Job *Job `json:"Job,omitempty"` 93 94 Index int `json:"Index,omitempty"` 95 } 96 97 func (shard JobShard) ID() string { 98 return GetShardID(shard.Job.Metadata.ID, shard.Index) 99 } 100 101 func (shard JobShard) String() string { 102 return shard.ID() 103 } 104 105 type JobExecutionPlan struct { 106 // how many shards are there in total for this job 107 // we are expecting this number x concurrency total 108 // JobShardState objects for this job 109 TotalShards int `json:"ShardsTotal,omitempty"` 110 } 111 112 // describe how we chunk a job up into shards 113 type JobShardingConfig struct { 114 // divide the inputs up into the smallest possible unit 115 // for example /* would mean "all top level files or folders" 116 // this being an empty string means "no sharding" 117 GlobPattern string `json:"GlobPattern,omitempty"` 118 // how many "items" are to be processed in each shard 119 // we first apply the glob pattern which will result in a flat list of items 120 // this number decides how to group that flat list into actual shards run by compute nodes 121 BatchSize int `json:"BatchSize,omitempty"` 122 // when using multiple input volumes 123 // what path do we treat as the common mount path to apply the glob pattern to 124 BasePath string `json:"GlobPatternBasePath,omitempty"` 125 } 126 127 // The state of a job across the whole network 128 // generally be in different states on different nodes - one node may be 129 // ignoring a job as its bid was rejected, while another node may be 130 // submitting results for the job to the requester node. 131 // 132 // Each node will produce an array of JobShardState one for each shard 133 // (jobs without a sharding config will still have sharded job 134 // states - just with a shard count of 1). Any code that is determining 135 // the current "state" of a job must look at both: 136 // 137 // - the ShardCount of the JobExecutionPlan 138 // - the collection of JobShardState to determine the current state 139 // 140 // Note: JobState itself is not mutable - the JobExecutionPlan and 141 // JobShardState are updatable and the JobState is queried by the rest 142 // of the system. 143 type JobState struct { 144 Nodes map[string]JobNodeState `json:"Nodes,omitempty"` 145 } 146 147 type JobNodeState struct { 148 Shards map[int]JobShardState `json:"Shards,omitempty"` 149 } 150 151 type JobShardState struct { 152 // which node is running this shard 153 NodeID string `json:"NodeId,omitempty"` 154 // Compute node reference for this shard execution 155 ExecutionID string `json:"ExecutionId,omitempty"` 156 // what shard is this we are running 157 ShardIndex int `json:"ShardIndex,omitempty"` 158 // what is the state of the shard on this node 159 State JobStateType `json:"State,omitempty"` 160 // an arbitrary status message 161 Status string `json:"Status,omitempty"` 162 // the proposed results for this shard 163 // this will be resolved by the verifier somehow 164 VerificationProposal []byte `json:"VerificationProposal,omitempty"` 165 VerificationResult VerificationResult `json:"VerificationResult,omitempty"` 166 PublishedResult StorageSpec `json:"PublishedResults,omitempty"` 167 168 // RunOutput of the job 169 RunOutput *RunCommandResult `json:"RunOutput,omitempty"` 170 } 171 172 // The deal the client has made with the bacalhau network. 173 // This is updateable by the client who submitted the job 174 type Deal struct { 175 // The maximum number of concurrent compute node bids that will be 176 // accepted by the requester node on behalf of the client. 177 Concurrency int `json:"Concurrency,omitempty"` 178 // The number of nodes that must agree on a verification result 179 // this is used by the different verifiers - for example the 180 // deterministic verifier requires the winning group size 181 // to be at least this size 182 Confidence int `json:"Confidence,omitempty"` 183 // The minimum number of bids that must be received before the Requester 184 // node will randomly accept concurrency-many of them. This allows the 185 // Requester node to get some level of guarantee that the execution of the 186 // jobs will be spread evenly across the network (assuming that this value 187 // is some large proportion of the size of the network). 188 MinBids int `json:"MinBids,omitempty"` 189 } 190 191 // LabelSelectorRequirement A selector that contains values, a key, and an operator that relates the key and values. 192 // These are based on labels library from kubernetes package. While we use labels.Requirement to represent the label selector requirements 193 // in the command line arguments as the library supports multiple parsing formats, and we also use it when matching selectors to labels 194 // as that's what the library expects, labels.Requirements are not serializable, so we need to convert them to LabelSelectorRequirements. 195 type LabelSelectorRequirement struct { 196 // key is the label key that the selector applies to. 197 Key string `json:"Key"` 198 // operator represents a key's relationship to a set of values. 199 // Valid operators are In, NotIn, Exists and DoesNotExist. 200 Operator selection.Operator `json:"Operator"` 201 // values is an array of string values. If the operator is In or NotIn, 202 // the values array must be non-empty. If the operator is Exists or DoesNotExist, 203 // the values array must be empty. This array is replaced during a strategic 204 Values []string `json:"Values,omitempty"` 205 } 206 207 // Spec is a complete specification of a job that can be run on some 208 // execution provider. 209 type Spec struct { 210 // e.g. docker or language 211 Engine Engine `json:"Engine,omitempty"` 212 213 Verifier Verifier `json:"Verifier,omitempty"` 214 215 // there can be multiple publishers for the job 216 Publisher Publisher `json:"Publisher,omitempty"` 217 218 // executor specific data 219 Docker JobSpecDocker `json:"Docker,omitempty"` 220 Language JobSpecLanguage `json:"Language,omitempty"` 221 Wasm JobSpecWasm `json:"Wasm,omitempty"` 222 223 // the compute (cpu, ram) resources this job requires 224 Resources ResourceUsageConfig `json:"Resources,omitempty"` 225 226 // The type of networking access that the job needs 227 Network NetworkConfig `json:"Network,omitempty"` 228 229 // How long a job can run in seconds before it is killed. 230 // This includes the time required to run, verify and publish results 231 Timeout float64 `json:"Timeout,omitempty"` 232 233 // the data volumes we will read in the job 234 // for example "read this ipfs cid" 235 // TODO: #667 Replace with "Inputs", "Outputs" (note the caps) for yaml/json when we update the n.js file 236 Inputs []StorageSpec `json:"inputs,omitempty"` 237 238 // Input volumes that will not be sharded 239 // for example to upload code into a base image 240 // every shard will get the full range of context volumes 241 Contexts []StorageSpec `json:"Contexts,omitempty"` 242 243 // the data volumes we will write in the job 244 // for example "write the results to ipfs" 245 Outputs []StorageSpec `json:"outputs,omitempty"` 246 247 // Annotations on the job - could be user or machine assigned 248 Annotations []string `json:"Annotations,omitempty"` 249 250 // NodeSelectors is a selector which must be true for the compute node to run this job. 251 NodeSelectors []LabelSelectorRequirement `json:"NodeSelectors,omitempty"` 252 // the sharding config for this job 253 // describes how the job might be split up into parallel shards 254 Sharding JobShardingConfig `json:"Sharding,omitempty"` 255 256 // Do not track specified by the client 257 DoNotTrack bool `json:"DoNotTrack,omitempty"` 258 259 // how will this job be executed by nodes on the network 260 ExecutionPlan JobExecutionPlan `json:"ExecutionPlan,omitempty"` 261 262 // The deal the client has made, such as which job bids they have accepted. 263 Deal Deal `json:"Deal,omitempty"` 264 } 265 266 // Return timeout duration 267 func (s *Spec) GetTimeout() time.Duration { 268 return time.Duration(s.Timeout * float64(time.Second)) 269 } 270 271 // Return pointers to all the storage specs in the spec. 272 func (s *Spec) AllStorageSpecs() []*StorageSpec { 273 storages := []*StorageSpec{ 274 &s.Language.Context, 275 &s.Wasm.EntryModule, 276 } 277 278 for _, collection := range [][]StorageSpec{ 279 s.Contexts, 280 s.Inputs, 281 s.Outputs, 282 } { 283 for index := range collection { 284 storages = append(storages, &collection[index]) 285 } 286 } 287 288 return storages 289 } 290 291 // for VM style executors 292 type JobSpecDocker struct { 293 // this should be pullable by docker 294 Image string `json:"Image,omitempty"` 295 // optionally override the default entrypoint 296 Entrypoint []string `json:"Entrypoint,omitempty"` 297 // a map of env to run the container with 298 EnvironmentVariables []string `json:"EnvironmentVariables,omitempty"` 299 // working directory inside the container 300 WorkingDirectory string `json:"WorkingDirectory,omitempty"` 301 } 302 303 // for language style executors (can target docker or wasm) 304 type JobSpecLanguage struct { 305 Language string `json:"Language,omitempty"` // e.g. python 306 LanguageVersion string `json:"LanguageVersion,omitempty"` // e.g. 3.8 307 // must this job be run in a deterministic context? 308 Deterministic bool `json:"DeterministicExecution,omitempty"` 309 // context is a tar file stored in ipfs, containing e.g. source code and requirements 310 Context StorageSpec `json:"JobContext,omitempty"` 311 // optional program specified on commandline, like python -c "print(1+1)" 312 Command string `json:"Command,omitempty"` 313 // optional program path relative to the context dir. one of Command or ProgramPath must be specified 314 ProgramPath string `json:"ProgramPath,omitempty"` 315 // optional requirements.txt (or equivalent) path relative to the context dir 316 RequirementsPath string `json:"RequirementsPath,omitempty"` 317 } 318 319 // Describes a raw WASM job 320 type JobSpecWasm struct { 321 // The module that contains the WASM code to start running. 322 EntryModule StorageSpec `json:"EntryModule,omitempty"` 323 324 // The name of the function in the EntryModule to call to run the job. For 325 // WASI jobs, this will always be `_start`, but jobs can choose to call 326 // other WASM functions instead. The EntryPoint must be a zero-parameter 327 // zero-result function. 328 EntryPoint string `json:"EntryPoint,omitempty"` 329 330 // The arguments supplied to the program (i.e. as ARGV). 331 Parameters []string `json:"Parameters,omitempty"` 332 333 // The variables available in the environment of the running program. 334 EnvironmentVariables map[string]string `json:"EnvironmentVariables,omitempty"` 335 336 // TODO #880: Other WASM modules whose exports will be available as imports 337 // to the EntryModule. 338 ImportModules []StorageSpec `json:"ImportModules,omitempty"` 339 } 340 341 // gives us a way to keep local data against a job 342 // so our compute node and requester node control loops 343 // can keep state against a job without broadcasting it 344 // to the rest of the network 345 type JobLocalEvent struct { 346 EventName JobLocalEventType `json:"EventName,omitempty"` 347 JobID string `json:"JobID,omitempty"` 348 ShardIndex int `json:"ShardIndex,omitempty"` 349 TargetNodeID string `json:"TargetNodeID,omitempty"` 350 } 351 352 // we emit these to other nodes so they update their 353 // state locally and can emit events locally 354 type JobEvent struct { 355 // APIVersion of the Job 356 APIVersion string `json:"APIVersion,omitempty" example:"V1beta1"` 357 358 JobID string `json:"JobID,omitempty" example:"9304c616-291f-41ad-b862-54e133c0149e"` 359 // what shard is this event for 360 ShardIndex int `json:"ShardIndex,omitempty"` 361 // compute execution identifier 362 ExecutionID string `json:"ExecutionID,omitempty" example:"9304c616-291f-41ad-b862-54e133c0149e"` 363 // optional clientID if this is an externally triggered event (like create job) 364 ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"` 365 // the node that emitted this event 366 SourceNodeID string `json:"SourceNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"` 367 // the node that this event is for 368 // e.g. "AcceptJobBid" was emitted by Requester but it targeting compute node 369 TargetNodeID string `json:"TargetNodeID,omitempty" example:"QmdZQ7ZbhnvWY1J12XYKGHApJ6aufKyLNSvf8jZBrBaAVL"` 370 EventName JobEventType `json:"EventName,omitempty"` 371 // this is only defined in "create" events 372 Spec Spec `json:"Spec,omitempty"` 373 // this is only defined in "create" events 374 JobExecutionPlan JobExecutionPlan `json:"JobExecutionPlan,omitempty"` 375 // this is only defined in "update_deal" events 376 Deal Deal `json:"Deal,omitempty"` 377 Status string `json:"Status,omitempty" example:"Got results proposal of length: 0"` 378 VerificationProposal []byte `json:"VerificationProposal,omitempty"` 379 VerificationResult VerificationResult `json:"VerificationResult,omitempty"` 380 PublishedResult StorageSpec `json:"PublishedResult,omitempty"` 381 382 EventTime time.Time `json:"EventTime,omitempty" example:"2022-11-17T13:32:55.756658941Z"` 383 SenderPublicKey PublicKey `json:"SenderPublicKey,omitempty"` 384 385 // RunOutput of the job 386 RunOutput *RunCommandResult `json:"RunOutput,omitempty"` 387 } 388 389 // we need to use a struct for the result because: 390 // a) otherwise we don't know if VerificationResult==false 391 // means "I've not verified yet" or "verification failed" 392 // b) we might want to add further fields to the result later 393 type VerificationResult struct { 394 Complete bool `json:"Complete,omitempty"` 395 Result bool `json:"Result,omitempty"` 396 } 397 398 type JobCreatePayload struct { 399 // the id of the client that is submitting the job 400 ClientID string `json:"ClientID,omitempty" validate:"required"` 401 402 APIVersion string `json:"APIVersion,omitempty" example:"V1beta1" validate:"required"` 403 404 // The specification of this job. 405 Spec *Spec `json:"Spec,omitempty" validate:"required"` 406 }