github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/model/v1alpha1/job.go (about)

     1  package v1alpha1
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/imdario/mergo"
     7  )
     8  
     9  // Job contains data about a job request in the bacalhau network.
    10  type Job struct {
    11  	APIVersion string `json:"APIVersion" example:"V1beta1"`
    12  
    13  	// The unique global ID of this job in the bacalhau network.
    14  	ID string `json:"ID,omitempty" example:"92d5d4ee-3765-4f78-8353-623f5f26df08"`
    15  
    16  	// The ID of the requester node that owns this job.
    17  	RequesterNodeID string `json:"RequesterNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"`
    18  
    19  	// The public key of the Requester node that created this job
    20  	// This can be used to encrypt messages back to the creator
    21  	RequesterPublicKey PublicKey `json:"RequesterPublicKey,omitempty"`
    22  
    23  	// The ID of the client that created this job.
    24  	ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"`
    25  
    26  	// The specification of this job.
    27  	Spec Spec `json:"Spec,omitempty"`
    28  
    29  	// The deal the client has made, such as which job bids they have accepted.
    30  	Deal Deal `json:"Deal,omitempty"`
    31  
    32  	// how will this job be executed by nodes on the network
    33  	ExecutionPlan JobExecutionPlan `json:"ExecutionPlan,omitempty"`
    34  
    35  	// Time the job was submitted to the bacalhau network.
    36  	CreatedAt time.Time `json:"CreatedAt,omitempty" example:"2022-11-17T13:29:01.871140291Z"`
    37  
    38  	// The current state of the job
    39  	State JobState `json:"JobState,omitempty"`
    40  
    41  	// All events associated with the job
    42  	Events []JobEvent `json:"JobEvents,omitempty"`
    43  
    44  	// All local events associated with the job
    45  	LocalEvents []JobLocalEvent `json:"LocalJobEvents,omitempty"`
    46  }
    47  
    48  func (job Job) String() string {
    49  	return job.ID
    50  }
    51  
    52  // TODO: There's probably a better way we want to globally version APIs
    53  func NewJob() *Job {
    54  	return &Job{
    55  		APIVersion: APIVersionLatest().String(),
    56  	}
    57  }
    58  
    59  func NewJobWithSaneProductionDefaults() (*Job, error) {
    60  	j := NewJob()
    61  	err := mergo.Merge(j, &Job{
    62  		APIVersion: APIVersionLatest().String(),
    63  		Spec: Spec{
    64  			Engine:    EngineDocker,
    65  			Verifier:  VerifierNoop,
    66  			Publisher: PublisherEstuary,
    67  		},
    68  		Deal: Deal{
    69  			Concurrency: 1,
    70  			Confidence:  0,
    71  			MinBids:     0, // 0 means no minimum before bidding
    72  		},
    73  	})
    74  	if err != nil {
    75  		return nil, err
    76  	}
    77  	return j, nil
    78  }
    79  
    80  // JobWithInfo is the job request + the result of attempting to run it on the network
    81  type JobWithInfo struct {
    82  	Job            Job             `json:"Job,omitempty"`
    83  	JobState       JobState        `json:"JobState,omitempty"`
    84  	JobEvents      []JobEvent      `json:"JobEvents,omitempty"`
    85  	JobLocalEvents []JobLocalEvent `json:"JobLocalEvents,omitempty"`
    86  }
    87  
    88  // JobShard contains data about a job shard in the bacalhau network.
    89  type JobShard struct {
    90  	Job *Job `json:"Job,omitempty"`
    91  
    92  	Index int `json:"Index,omitempty"`
    93  }
    94  
    95  func (shard JobShard) ID() string {
    96  	return GetShardID(shard.Job.ID, shard.Index)
    97  }
    98  
    99  func (shard JobShard) String() string {
   100  	return shard.ID()
   101  }
   102  
   103  type JobExecutionPlan struct {
   104  	// how many shards are there in total for this job
   105  	// we are expecting this number x concurrency total
   106  	// JobShardState objects for this job
   107  	TotalShards int `json:"ShardsTotal,omitempty"`
   108  }
   109  
   110  // describe how we chunk a job up into shards
   111  type JobShardingConfig struct {
   112  	// divide the inputs up into the smallest possible unit
   113  	// for example /* would mean "all top level files or folders"
   114  	// this being an empty string means "no sharding"
   115  	GlobPattern string `json:"GlobPattern,omitempty"`
   116  	// how many "items" are to be processed in each shard
   117  	// we first apply the glob pattern which will result in a flat list of items
   118  	// this number decides how to group that flat list into actual shards run by compute nodes
   119  	BatchSize int `json:"BatchSize,omitempty"`
   120  	// when using multiple input volumes
   121  	// what path do we treat as the common mount path to apply the glob pattern to
   122  	BasePath string `json:"GlobPatternBasePath,omitempty"`
   123  }
   124  
   125  // The state of a job across the whole network
   126  // generally be in different states on different nodes - one node may be
   127  // ignoring a job as its bid was rejected, while another node may be
   128  // submitting results for the job to the requester node.
   129  //
   130  // Each node will produce an array of JobShardState one for each shard
   131  // (jobs without a sharding config will still have sharded job
   132  // states - just with a shard count of 1). Any code that is determining
   133  // the current "state" of a job must look at both:
   134  //
   135  //   - the ShardCount of the JobExecutionPlan
   136  //   - the collection of JobShardState to determine the current state
   137  //
   138  // Note: JobState itself is not mutable - the JobExecutionPlan and
   139  // JobShardState are updatable and the JobState is queried by the rest
   140  // of the system.
   141  type JobState struct {
   142  	Nodes map[string]JobNodeState `json:"Nodes,omitempty"`
   143  }
   144  
   145  type JobNodeState struct {
   146  	Shards map[int]JobShardState `json:"Shards,omitempty"`
   147  }
   148  
   149  type JobShardState struct {
   150  	// which node is running this shard
   151  	NodeID string `json:"NodeId,omitempty"`
   152  	// what shard is this we are running
   153  	ShardIndex int `json:"ShardIndex,omitempty"`
   154  	// what is the state of the shard on this node
   155  	State JobStateType `json:"State,omitempty"`
   156  	// an arbitrary status message
   157  	Status string `json:"Status,omitempty"`
   158  	// the proposed results for this shard
   159  	// this will be resolved by the verifier somehow
   160  	VerificationProposal []byte             `json:"VerificationProposal,omitempty"`
   161  	VerificationResult   VerificationResult `json:"VerificationResult,omitempty"`
   162  	PublishedResult      StorageSpec        `json:"PublishedResults,omitempty"`
   163  
   164  	// RunOutput of the job
   165  	RunOutput *RunCommandResult `json:"RunOutput,omitempty"`
   166  }
   167  
   168  // The deal the client has made with the bacalhau network.
   169  // This is updateable by the client who submitted the job
   170  type Deal struct {
   171  	// The maximum number of concurrent compute node bids that will be
   172  	// accepted by the requester node on behalf of the client.
   173  	Concurrency int `json:"Concurrency,omitempty"`
   174  	// The number of nodes that must agree on a verification result
   175  	// this is used by the different verifiers - for example the
   176  	// deterministic verifier requires the winning group size
   177  	// to be at least this size
   178  	Confidence int `json:"Confidence,omitempty"`
   179  	// The minimum number of bids that must be received before the Requester
   180  	// node will randomly accept concurrency-many of them. This allows the
   181  	// Requester node to get some level of guarantee that the execution of the
   182  	// jobs will be spread evenly across the network (assuming that this value
   183  	// is some large proportion of the size of the network).
   184  	MinBids int `json:"MinBids,omitempty"`
   185  }
   186  
   187  // Spec is a complete specification of a job that can be run on some
   188  // execution provider.
   189  type Spec struct {
   190  	// e.g. docker or language
   191  	Engine Engine `json:"Engine,omitempty"`
   192  
   193  	Verifier Verifier `json:"Verifier,omitempty"`
   194  
   195  	// there can be multiple publishers for the job
   196  	Publisher Publisher `json:"Publisher,omitempty"`
   197  
   198  	// executor specific data
   199  	Docker   JobSpecDocker   `json:"Docker,omitempty"`
   200  	Language JobSpecLanguage `json:"Language,omitempty"`
   201  	Wasm     JobSpecWasm     `json:"Wasm,omitempty"`
   202  
   203  	// the compute (cpu, ram) resources this job requires
   204  	Resources ResourceUsageConfig `json:"Resources,omitempty"`
   205  
   206  	// How long a job can run in seconds before it is killed.
   207  	// This includes the time required to run, verify and publish results
   208  	Timeout float64 `json:"Timeout,omitempty"`
   209  
   210  	// the data volumes we will read in the job
   211  	// for example "read this ipfs cid"
   212  	// TODO: #667 Replace with "Inputs", "Outputs" (note the caps) for yaml/json when we update the n.js file
   213  	Inputs []StorageSpec `json:"inputs,omitempty"`
   214  
   215  	// Input volumes that will not be sharded
   216  	// for example to upload code into a base image
   217  	// every shard will get the full range of context volumes
   218  	Contexts []StorageSpec `json:"Contexts,omitempty"`
   219  
   220  	// the data volumes we will write in the job
   221  	// for example "write the results to ipfs"
   222  	Outputs []StorageSpec `json:"outputs,omitempty"`
   223  
   224  	// Annotations on the job - could be user or machine assigned
   225  	Annotations []string `json:"Annotations,omitempty"`
   226  
   227  	// the sharding config for this job
   228  	// describes how the job might be split up into parallel shards
   229  	Sharding JobShardingConfig `json:"Sharding,omitempty"`
   230  
   231  	// Do not track specified by the client
   232  	DoNotTrack bool `json:"DoNotTrack,omitempty"`
   233  }
   234  
   235  // Return timeout duration
   236  func (s *Spec) GetTimeout() time.Duration {
   237  	return time.Duration(s.Timeout * float64(time.Second))
   238  }
   239  
   240  // for VM style executors
   241  type JobSpecDocker struct {
   242  	// this should be pullable by docker
   243  	Image string `json:"Image,omitempty"`
   244  	// optionally override the default entrypoint
   245  	Entrypoint []string `json:"Entrypoint,omitempty"`
   246  	// a map of env to run the container with
   247  	EnvironmentVariables []string `json:"EnvironmentVariables,omitempty"`
   248  	// working directory inside the container
   249  	WorkingDirectory string `json:"WorkingDirectory,omitempty"`
   250  }
   251  
   252  // for language style executors (can target docker or wasm)
   253  type JobSpecLanguage struct {
   254  	Language        string `json:"Language,omitempty"`        // e.g. python
   255  	LanguageVersion string `json:"LanguageVersion,omitempty"` // e.g. 3.8
   256  	// must this job be run in a deterministic context?
   257  	Deterministic bool `json:"DeterministicExecution,omitempty"`
   258  	// context is a tar file stored in ipfs, containing e.g. source code and requirements
   259  	Context StorageSpec `json:"JobContext,omitempty"`
   260  	// optional program specified on commandline, like python -c "print(1+1)"
   261  	Command string `json:"Command,omitempty"`
   262  	// optional program path relative to the context dir. one of Command or ProgramPath must be specified
   263  	ProgramPath string `json:"ProgramPath,omitempty"`
   264  	// optional requirements.txt (or equivalent) path relative to the context dir
   265  	RequirementsPath string `json:"RequirementsPath,omitempty"`
   266  }
   267  
   268  // Describes a raw WASM job
   269  type JobSpecWasm struct {
   270  	// TODO #915: The module that contains the WASM code to start running.
   271  	// EntryModule StorageSpec `json:"EntryModule,omitempty"`
   272  
   273  	// The name of the function in the EntryModule to call to run the job. For
   274  	// WASI jobs, this will always be `_start`, but jobs can choose to call
   275  	// other WASM functions instead. The EntryPoint must be a zero-parameter
   276  	// zero-result function.
   277  	EntryPoint string `json:"EntryPoint,omitempty"`
   278  
   279  	// The arguments supplied to the program (i.e. as ARGV).
   280  	Parameters []string `json:"Parameters,omitempty"`
   281  
   282  	// The variables available in the environment of the running program.
   283  	EnvironmentVariables map[string]string `json:"EnvironmentVariables,omitempty"`
   284  
   285  	// TODO #880: Other WASM modules whose exports will be available as imports
   286  	// to the EntryModule.
   287  	ImportModules []StorageSpec `json:"ImportModules,omitempty"`
   288  }
   289  
   290  // gives us a way to keep local data against a job
   291  // so our compute node and requester node control loops
   292  // can keep state against a job without broadcasting it
   293  // to the rest of the network
   294  type JobLocalEvent struct {
   295  	EventName    JobLocalEventType `json:"EventName,omitempty"`
   296  	JobID        string            `json:"JobID,omitempty"`
   297  	ShardIndex   int               `json:"ShardIndex,omitempty"`
   298  	TargetNodeID string            `json:"TargetNodeID,omitempty"`
   299  }
   300  
   301  // we emit these to other nodes so they update their
   302  // state locally and can emit events locally
   303  type JobEvent struct {
   304  	// APIVersion of the Job
   305  	APIVersion string `json:"APIVersion,omitempty" example:"V1beta1"`
   306  
   307  	JobID string `json:"JobID,omitempty" example:"9304c616-291f-41ad-b862-54e133c0149e"`
   308  	// what shard is this event for
   309  	ShardIndex int `json:"ShardIndex,omitempty"`
   310  	// optional clientID if this is an externally triggered event (like create job)
   311  	ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"`
   312  	// the node that emitted this event
   313  	SourceNodeID string `json:"SourceNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"`
   314  	// the node that this event is for
   315  	// e.g. "AcceptJobBid" was emitted by Requester but it targeting compute node
   316  	TargetNodeID string       `json:"TargetNodeID,omitempty" example:"QmdZQ7ZbhnvWY1J12XYKGHApJ6aufKyLNSvf8jZBrBaAVL"`
   317  	EventName    JobEventType `json:"EventName,omitempty"`
   318  	// this is only defined in "create" events
   319  	Spec Spec `json:"Spec,omitempty"`
   320  	// this is only defined in "create" events
   321  	JobExecutionPlan JobExecutionPlan `json:"JobExecutionPlan,omitempty"`
   322  	// this is only defined in "update_deal" events
   323  	Deal                 Deal               `json:"Deal,omitempty"`
   324  	Status               string             `json:"Status,omitempty" example:"Got results proposal of length: 0"`
   325  	VerificationProposal []byte             `json:"VerificationProposal,omitempty"`
   326  	VerificationResult   VerificationResult `json:"VerificationResult,omitempty"`
   327  	PublishedResult      StorageSpec        `json:"PublishedResult,omitempty"`
   328  
   329  	EventTime       time.Time `json:"EventTime,omitempty" example:"2022-11-17T13:32:55.756658941Z"`
   330  	SenderPublicKey PublicKey `json:"SenderPublicKey,omitempty"`
   331  
   332  	// RunOutput of the job
   333  	RunOutput *RunCommandResult `json:"RunOutput,omitempty"`
   334  }
   335  
   336  // we need to use a struct for the result because:
   337  // a) otherwise we don't know if VerificationResult==false
   338  // means "I've not verified yet" or "verification failed"
   339  // b) we might want to add further fields to the result later
   340  type VerificationResult struct {
   341  	Complete bool `json:"Complete,omitempty"`
   342  	Result   bool `json:"Result,omitempty"`
   343  }
   344  
   345  type JobCreatePayload struct {
   346  	// the id of the client that is submitting the job
   347  	ClientID string `json:"ClientID,omitempty" validate:"required"`
   348  
   349  	// The job specification:
   350  	Job *Job `json:"Job,omitempty" validate:"required"`
   351  
   352  	// Optional base64-encoded tar file that will be pinned to IPFS and
   353  	// mounted as storage for the job. Not part of the spec so we don't
   354  	// flood the transport layer with it (potentially very large).
   355  	Context string `json:"Context,omitempty" validate:"optional"`
   356  }