github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/model/v1alpha1/job.go (about) 1 package v1alpha1 2 3 import ( 4 "time" 5 6 "github.com/imdario/mergo" 7 ) 8 9 // Job contains data about a job request in the bacalhau network. 10 type Job struct { 11 APIVersion string `json:"APIVersion" example:"V1beta1"` 12 13 // The unique global ID of this job in the bacalhau network. 14 ID string `json:"ID,omitempty" example:"92d5d4ee-3765-4f78-8353-623f5f26df08"` 15 16 // The ID of the requester node that owns this job. 17 RequesterNodeID string `json:"RequesterNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"` 18 19 // The public key of the Requester node that created this job 20 // This can be used to encrypt messages back to the creator 21 RequesterPublicKey PublicKey `json:"RequesterPublicKey,omitempty"` 22 23 // The ID of the client that created this job. 24 ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"` 25 26 // The specification of this job. 27 Spec Spec `json:"Spec,omitempty"` 28 29 // The deal the client has made, such as which job bids they have accepted. 30 Deal Deal `json:"Deal,omitempty"` 31 32 // how will this job be executed by nodes on the network 33 ExecutionPlan JobExecutionPlan `json:"ExecutionPlan,omitempty"` 34 35 // Time the job was submitted to the bacalhau network. 36 CreatedAt time.Time `json:"CreatedAt,omitempty" example:"2022-11-17T13:29:01.871140291Z"` 37 38 // The current state of the job 39 State JobState `json:"JobState,omitempty"` 40 41 // All events associated with the job 42 Events []JobEvent `json:"JobEvents,omitempty"` 43 44 // All local events associated with the job 45 LocalEvents []JobLocalEvent `json:"LocalJobEvents,omitempty"` 46 } 47 48 func (job Job) String() string { 49 return job.ID 50 } 51 52 // TODO: There's probably a better way we want to globally version APIs 53 func NewJob() *Job { 54 return &Job{ 55 APIVersion: APIVersionLatest().String(), 56 } 57 } 58 59 func NewJobWithSaneProductionDefaults() (*Job, error) { 60 j := NewJob() 61 err := mergo.Merge(j, &Job{ 62 APIVersion: APIVersionLatest().String(), 63 Spec: Spec{ 64 Engine: EngineDocker, 65 Verifier: VerifierNoop, 66 Publisher: PublisherEstuary, 67 }, 68 Deal: Deal{ 69 Concurrency: 1, 70 Confidence: 0, 71 MinBids: 0, // 0 means no minimum before bidding 72 }, 73 }) 74 if err != nil { 75 return nil, err 76 } 77 return j, nil 78 } 79 80 // JobWithInfo is the job request + the result of attempting to run it on the network 81 type JobWithInfo struct { 82 Job Job `json:"Job,omitempty"` 83 JobState JobState `json:"JobState,omitempty"` 84 JobEvents []JobEvent `json:"JobEvents,omitempty"` 85 JobLocalEvents []JobLocalEvent `json:"JobLocalEvents,omitempty"` 86 } 87 88 // JobShard contains data about a job shard in the bacalhau network. 89 type JobShard struct { 90 Job *Job `json:"Job,omitempty"` 91 92 Index int `json:"Index,omitempty"` 93 } 94 95 func (shard JobShard) ID() string { 96 return GetShardID(shard.Job.ID, shard.Index) 97 } 98 99 func (shard JobShard) String() string { 100 return shard.ID() 101 } 102 103 type JobExecutionPlan struct { 104 // how many shards are there in total for this job 105 // we are expecting this number x concurrency total 106 // JobShardState objects for this job 107 TotalShards int `json:"ShardsTotal,omitempty"` 108 } 109 110 // describe how we chunk a job up into shards 111 type JobShardingConfig struct { 112 // divide the inputs up into the smallest possible unit 113 // for example /* would mean "all top level files or folders" 114 // this being an empty string means "no sharding" 115 GlobPattern string `json:"GlobPattern,omitempty"` 116 // how many "items" are to be processed in each shard 117 // we first apply the glob pattern which will result in a flat list of items 118 // this number decides how to group that flat list into actual shards run by compute nodes 119 BatchSize int `json:"BatchSize,omitempty"` 120 // when using multiple input volumes 121 // what path do we treat as the common mount path to apply the glob pattern to 122 BasePath string `json:"GlobPatternBasePath,omitempty"` 123 } 124 125 // The state of a job across the whole network 126 // generally be in different states on different nodes - one node may be 127 // ignoring a job as its bid was rejected, while another node may be 128 // submitting results for the job to the requester node. 129 // 130 // Each node will produce an array of JobShardState one for each shard 131 // (jobs without a sharding config will still have sharded job 132 // states - just with a shard count of 1). Any code that is determining 133 // the current "state" of a job must look at both: 134 // 135 // - the ShardCount of the JobExecutionPlan 136 // - the collection of JobShardState to determine the current state 137 // 138 // Note: JobState itself is not mutable - the JobExecutionPlan and 139 // JobShardState are updatable and the JobState is queried by the rest 140 // of the system. 141 type JobState struct { 142 Nodes map[string]JobNodeState `json:"Nodes,omitempty"` 143 } 144 145 type JobNodeState struct { 146 Shards map[int]JobShardState `json:"Shards,omitempty"` 147 } 148 149 type JobShardState struct { 150 // which node is running this shard 151 NodeID string `json:"NodeId,omitempty"` 152 // what shard is this we are running 153 ShardIndex int `json:"ShardIndex,omitempty"` 154 // what is the state of the shard on this node 155 State JobStateType `json:"State,omitempty"` 156 // an arbitrary status message 157 Status string `json:"Status,omitempty"` 158 // the proposed results for this shard 159 // this will be resolved by the verifier somehow 160 VerificationProposal []byte `json:"VerificationProposal,omitempty"` 161 VerificationResult VerificationResult `json:"VerificationResult,omitempty"` 162 PublishedResult StorageSpec `json:"PublishedResults,omitempty"` 163 164 // RunOutput of the job 165 RunOutput *RunCommandResult `json:"RunOutput,omitempty"` 166 } 167 168 // The deal the client has made with the bacalhau network. 169 // This is updateable by the client who submitted the job 170 type Deal struct { 171 // The maximum number of concurrent compute node bids that will be 172 // accepted by the requester node on behalf of the client. 173 Concurrency int `json:"Concurrency,omitempty"` 174 // The number of nodes that must agree on a verification result 175 // this is used by the different verifiers - for example the 176 // deterministic verifier requires the winning group size 177 // to be at least this size 178 Confidence int `json:"Confidence,omitempty"` 179 // The minimum number of bids that must be received before the Requester 180 // node will randomly accept concurrency-many of them. This allows the 181 // Requester node to get some level of guarantee that the execution of the 182 // jobs will be spread evenly across the network (assuming that this value 183 // is some large proportion of the size of the network). 184 MinBids int `json:"MinBids,omitempty"` 185 } 186 187 // Spec is a complete specification of a job that can be run on some 188 // execution provider. 189 type Spec struct { 190 // e.g. docker or language 191 Engine Engine `json:"Engine,omitempty"` 192 193 Verifier Verifier `json:"Verifier,omitempty"` 194 195 // there can be multiple publishers for the job 196 Publisher Publisher `json:"Publisher,omitempty"` 197 198 // executor specific data 199 Docker JobSpecDocker `json:"Docker,omitempty"` 200 Language JobSpecLanguage `json:"Language,omitempty"` 201 Wasm JobSpecWasm `json:"Wasm,omitempty"` 202 203 // the compute (cpu, ram) resources this job requires 204 Resources ResourceUsageConfig `json:"Resources,omitempty"` 205 206 // How long a job can run in seconds before it is killed. 207 // This includes the time required to run, verify and publish results 208 Timeout float64 `json:"Timeout,omitempty"` 209 210 // the data volumes we will read in the job 211 // for example "read this ipfs cid" 212 // TODO: #667 Replace with "Inputs", "Outputs" (note the caps) for yaml/json when we update the n.js file 213 Inputs []StorageSpec `json:"inputs,omitempty"` 214 215 // Input volumes that will not be sharded 216 // for example to upload code into a base image 217 // every shard will get the full range of context volumes 218 Contexts []StorageSpec `json:"Contexts,omitempty"` 219 220 // the data volumes we will write in the job 221 // for example "write the results to ipfs" 222 Outputs []StorageSpec `json:"outputs,omitempty"` 223 224 // Annotations on the job - could be user or machine assigned 225 Annotations []string `json:"Annotations,omitempty"` 226 227 // the sharding config for this job 228 // describes how the job might be split up into parallel shards 229 Sharding JobShardingConfig `json:"Sharding,omitempty"` 230 231 // Do not track specified by the client 232 DoNotTrack bool `json:"DoNotTrack,omitempty"` 233 } 234 235 // Return timeout duration 236 func (s *Spec) GetTimeout() time.Duration { 237 return time.Duration(s.Timeout * float64(time.Second)) 238 } 239 240 // for VM style executors 241 type JobSpecDocker struct { 242 // this should be pullable by docker 243 Image string `json:"Image,omitempty"` 244 // optionally override the default entrypoint 245 Entrypoint []string `json:"Entrypoint,omitempty"` 246 // a map of env to run the container with 247 EnvironmentVariables []string `json:"EnvironmentVariables,omitempty"` 248 // working directory inside the container 249 WorkingDirectory string `json:"WorkingDirectory,omitempty"` 250 } 251 252 // for language style executors (can target docker or wasm) 253 type JobSpecLanguage struct { 254 Language string `json:"Language,omitempty"` // e.g. python 255 LanguageVersion string `json:"LanguageVersion,omitempty"` // e.g. 3.8 256 // must this job be run in a deterministic context? 257 Deterministic bool `json:"DeterministicExecution,omitempty"` 258 // context is a tar file stored in ipfs, containing e.g. source code and requirements 259 Context StorageSpec `json:"JobContext,omitempty"` 260 // optional program specified on commandline, like python -c "print(1+1)" 261 Command string `json:"Command,omitempty"` 262 // optional program path relative to the context dir. one of Command or ProgramPath must be specified 263 ProgramPath string `json:"ProgramPath,omitempty"` 264 // optional requirements.txt (or equivalent) path relative to the context dir 265 RequirementsPath string `json:"RequirementsPath,omitempty"` 266 } 267 268 // Describes a raw WASM job 269 type JobSpecWasm struct { 270 // TODO #915: The module that contains the WASM code to start running. 271 // EntryModule StorageSpec `json:"EntryModule,omitempty"` 272 273 // The name of the function in the EntryModule to call to run the job. For 274 // WASI jobs, this will always be `_start`, but jobs can choose to call 275 // other WASM functions instead. The EntryPoint must be a zero-parameter 276 // zero-result function. 277 EntryPoint string `json:"EntryPoint,omitempty"` 278 279 // The arguments supplied to the program (i.e. as ARGV). 280 Parameters []string `json:"Parameters,omitempty"` 281 282 // The variables available in the environment of the running program. 283 EnvironmentVariables map[string]string `json:"EnvironmentVariables,omitempty"` 284 285 // TODO #880: Other WASM modules whose exports will be available as imports 286 // to the EntryModule. 287 ImportModules []StorageSpec `json:"ImportModules,omitempty"` 288 } 289 290 // gives us a way to keep local data against a job 291 // so our compute node and requester node control loops 292 // can keep state against a job without broadcasting it 293 // to the rest of the network 294 type JobLocalEvent struct { 295 EventName JobLocalEventType `json:"EventName,omitempty"` 296 JobID string `json:"JobID,omitempty"` 297 ShardIndex int `json:"ShardIndex,omitempty"` 298 TargetNodeID string `json:"TargetNodeID,omitempty"` 299 } 300 301 // we emit these to other nodes so they update their 302 // state locally and can emit events locally 303 type JobEvent struct { 304 // APIVersion of the Job 305 APIVersion string `json:"APIVersion,omitempty" example:"V1beta1"` 306 307 JobID string `json:"JobID,omitempty" example:"9304c616-291f-41ad-b862-54e133c0149e"` 308 // what shard is this event for 309 ShardIndex int `json:"ShardIndex,omitempty"` 310 // optional clientID if this is an externally triggered event (like create job) 311 ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"` 312 // the node that emitted this event 313 SourceNodeID string `json:"SourceNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"` 314 // the node that this event is for 315 // e.g. "AcceptJobBid" was emitted by Requester but it targeting compute node 316 TargetNodeID string `json:"TargetNodeID,omitempty" example:"QmdZQ7ZbhnvWY1J12XYKGHApJ6aufKyLNSvf8jZBrBaAVL"` 317 EventName JobEventType `json:"EventName,omitempty"` 318 // this is only defined in "create" events 319 Spec Spec `json:"Spec,omitempty"` 320 // this is only defined in "create" events 321 JobExecutionPlan JobExecutionPlan `json:"JobExecutionPlan,omitempty"` 322 // this is only defined in "update_deal" events 323 Deal Deal `json:"Deal,omitempty"` 324 Status string `json:"Status,omitempty" example:"Got results proposal of length: 0"` 325 VerificationProposal []byte `json:"VerificationProposal,omitempty"` 326 VerificationResult VerificationResult `json:"VerificationResult,omitempty"` 327 PublishedResult StorageSpec `json:"PublishedResult,omitempty"` 328 329 EventTime time.Time `json:"EventTime,omitempty" example:"2022-11-17T13:32:55.756658941Z"` 330 SenderPublicKey PublicKey `json:"SenderPublicKey,omitempty"` 331 332 // RunOutput of the job 333 RunOutput *RunCommandResult `json:"RunOutput,omitempty"` 334 } 335 336 // we need to use a struct for the result because: 337 // a) otherwise we don't know if VerificationResult==false 338 // means "I've not verified yet" or "verification failed" 339 // b) we might want to add further fields to the result later 340 type VerificationResult struct { 341 Complete bool `json:"Complete,omitempty"` 342 Result bool `json:"Result,omitempty"` 343 } 344 345 type JobCreatePayload struct { 346 // the id of the client that is submitting the job 347 ClientID string `json:"ClientID,omitempty" validate:"required"` 348 349 // The job specification: 350 Job *Job `json:"Job,omitempty" validate:"required"` 351 352 // Optional base64-encoded tar file that will be pinned to IPFS and 353 // mounted as storage for the job. Not part of the spec so we don't 354 // flood the transport layer with it (potentially very large). 355 Context string `json:"Context,omitempty" validate:"optional"` 356 }