github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/model/v1beta1/job.go (about)

     1  package v1beta1
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/imdario/mergo"
     7  	"k8s.io/apimachinery/pkg/selection"
     8  )
     9  
    10  // Job contains data about a job request in the bacalhau network.
    11  type Job struct {
    12  	APIVersion string `json:"APIVersion" example:"V1beta1"`
    13  
    14  	Metadata Metadata `json:"Metadata,omitempty"`
    15  
    16  	// The specification of this job.
    17  	Spec Spec `json:"Spec,omitempty"`
    18  
    19  	// The status of the job: where are the nodes at, what are the events
    20  	Status JobStatus `json:"Status,omitempty"`
    21  }
    22  
    23  type Metadata struct {
    24  	// The unique global ID of this job in the bacalhau network.
    25  	ID string `json:"ID,omitempty" example:"92d5d4ee-3765-4f78-8353-623f5f26df08"`
    26  
    27  	// Time the job was submitted to the bacalhau network.
    28  	CreatedAt time.Time `json:"CreatedAt,omitempty" example:"2022-11-17T13:29:01.871140291Z"`
    29  
    30  	// The ID of the client that created this job.
    31  	ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"`
    32  }
    33  type JobRequester struct {
    34  	// The ID of the requester node that owns this job.
    35  	RequesterNodeID string `json:"RequesterNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"`
    36  
    37  	// The public key of the Requester node that created this job
    38  	// This can be used to encrypt messages back to the creator
    39  	RequesterPublicKey PublicKey `json:"RequesterPublicKey,omitempty"`
    40  }
    41  type JobStatus struct {
    42  	// The current state of the job
    43  	State JobState `json:"JobState,omitempty"`
    44  
    45  	// All events associated with the job
    46  	Events []JobEvent `json:"JobEvents,omitempty"`
    47  
    48  	// All local events associated with the job
    49  	LocalEvents []JobLocalEvent `json:"LocalJobEvents,omitempty"`
    50  
    51  	Requester JobRequester `json:"Requester,omitempty"`
    52  }
    53  
    54  // TODO: There's probably a better way we want to globally version APIs
    55  func NewJob() *Job {
    56  	return &Job{
    57  		APIVersion: APIVersionLatest().String(),
    58  	}
    59  }
    60  
    61  func NewJobWithSaneProductionDefaults() (*Job, error) {
    62  	j := NewJob()
    63  	err := mergo.Merge(j, &Job{
    64  		APIVersion: APIVersionLatest().String(),
    65  		Spec: Spec{
    66  			Engine:    EngineDocker,
    67  			Verifier:  VerifierNoop,
    68  			Publisher: PublisherEstuary,
    69  			Deal: Deal{
    70  				Concurrency: 1,
    71  				Confidence:  0,
    72  				MinBids:     0, // 0 means no minimum before bidding
    73  			},
    74  		},
    75  	})
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	return j, nil
    80  }
    81  
    82  // JobWithInfo is the job request + the result of attempting to run it on the network
    83  type JobWithInfo struct {
    84  	Job            Job             `json:"Job,omitempty"`
    85  	JobState       JobState        `json:"JobState,omitempty"`
    86  	JobEvents      []JobEvent      `json:"JobEvents,omitempty"`
    87  	JobLocalEvents []JobLocalEvent `json:"JobLocalEvents,omitempty"`
    88  }
    89  
    90  // JobShard contains data about a job shard in the bacalhau network.
    91  type JobShard struct {
    92  	Job *Job `json:"Job,omitempty"`
    93  
    94  	Index int `json:"Index,omitempty"`
    95  }
    96  
    97  func (shard JobShard) ID() string {
    98  	return GetShardID(shard.Job.Metadata.ID, shard.Index)
    99  }
   100  
   101  func (shard JobShard) String() string {
   102  	return shard.ID()
   103  }
   104  
   105  type JobExecutionPlan struct {
   106  	// how many shards are there in total for this job
   107  	// we are expecting this number x concurrency total
   108  	// JobShardState objects for this job
   109  	TotalShards int `json:"ShardsTotal,omitempty"`
   110  }
   111  
   112  // describe how we chunk a job up into shards
   113  type JobShardingConfig struct {
   114  	// divide the inputs up into the smallest possible unit
   115  	// for example /* would mean "all top level files or folders"
   116  	// this being an empty string means "no sharding"
   117  	GlobPattern string `json:"GlobPattern,omitempty"`
   118  	// how many "items" are to be processed in each shard
   119  	// we first apply the glob pattern which will result in a flat list of items
   120  	// this number decides how to group that flat list into actual shards run by compute nodes
   121  	BatchSize int `json:"BatchSize,omitempty"`
   122  	// when using multiple input volumes
   123  	// what path do we treat as the common mount path to apply the glob pattern to
   124  	BasePath string `json:"GlobPatternBasePath,omitempty"`
   125  }
   126  
   127  // The state of a job across the whole network
   128  // generally be in different states on different nodes - one node may be
   129  // ignoring a job as its bid was rejected, while another node may be
   130  // submitting results for the job to the requester node.
   131  //
   132  // Each node will produce an array of JobShardState one for each shard
   133  // (jobs without a sharding config will still have sharded job
   134  // states - just with a shard count of 1). Any code that is determining
   135  // the current "state" of a job must look at both:
   136  //
   137  //   - the ShardCount of the JobExecutionPlan
   138  //   - the collection of JobShardState to determine the current state
   139  //
   140  // Note: JobState itself is not mutable - the JobExecutionPlan and
   141  // JobShardState are updatable and the JobState is queried by the rest
   142  // of the system.
   143  type JobState struct {
   144  	Nodes map[string]JobNodeState `json:"Nodes,omitempty"`
   145  }
   146  
   147  type JobNodeState struct {
   148  	Shards map[int]JobShardState `json:"Shards,omitempty"`
   149  }
   150  
   151  type JobShardState struct {
   152  	// which node is running this shard
   153  	NodeID string `json:"NodeId,omitempty"`
   154  	// Compute node reference for this shard execution
   155  	ExecutionID string `json:"ExecutionId,omitempty"`
   156  	// what shard is this we are running
   157  	ShardIndex int `json:"ShardIndex,omitempty"`
   158  	// what is the state of the shard on this node
   159  	State JobStateType `json:"State,omitempty"`
   160  	// an arbitrary status message
   161  	Status string `json:"Status,omitempty"`
   162  	// the proposed results for this shard
   163  	// this will be resolved by the verifier somehow
   164  	VerificationProposal []byte             `json:"VerificationProposal,omitempty"`
   165  	VerificationResult   VerificationResult `json:"VerificationResult,omitempty"`
   166  	PublishedResult      StorageSpec        `json:"PublishedResults,omitempty"`
   167  
   168  	// RunOutput of the job
   169  	RunOutput *RunCommandResult `json:"RunOutput,omitempty"`
   170  }
   171  
   172  // The deal the client has made with the bacalhau network.
   173  // This is updateable by the client who submitted the job
   174  type Deal struct {
   175  	// The maximum number of concurrent compute node bids that will be
   176  	// accepted by the requester node on behalf of the client.
   177  	Concurrency int `json:"Concurrency,omitempty"`
   178  	// The number of nodes that must agree on a verification result
   179  	// this is used by the different verifiers - for example the
   180  	// deterministic verifier requires the winning group size
   181  	// to be at least this size
   182  	Confidence int `json:"Confidence,omitempty"`
   183  	// The minimum number of bids that must be received before the Requester
   184  	// node will randomly accept concurrency-many of them. This allows the
   185  	// Requester node to get some level of guarantee that the execution of the
   186  	// jobs will be spread evenly across the network (assuming that this value
   187  	// is some large proportion of the size of the network).
   188  	MinBids int `json:"MinBids,omitempty"`
   189  }
   190  
   191  // LabelSelectorRequirement A selector that contains values, a key, and an operator that relates the key and values.
   192  // These are based on labels library from kubernetes package. While we use labels.Requirement to represent the label selector requirements
   193  // in the command line arguments as the library supports multiple parsing formats, and we also use it when matching selectors to labels
   194  // as that's what the library expects, labels.Requirements are not serializable, so we need to convert them to LabelSelectorRequirements.
   195  type LabelSelectorRequirement struct {
   196  	// key is the label key that the selector applies to.
   197  	Key string `json:"Key"`
   198  	// operator represents a key's relationship to a set of values.
   199  	// Valid operators are In, NotIn, Exists and DoesNotExist.
   200  	Operator selection.Operator `json:"Operator"`
   201  	// values is an array of string values. If the operator is In or NotIn,
   202  	// the values array must be non-empty. If the operator is Exists or DoesNotExist,
   203  	// the values array must be empty. This array is replaced during a strategic
   204  	Values []string `json:"Values,omitempty"`
   205  }
   206  
   207  // Spec is a complete specification of a job that can be run on some
   208  // execution provider.
   209  type Spec struct {
   210  	// e.g. docker or language
   211  	Engine Engine `json:"Engine,omitempty"`
   212  
   213  	Verifier Verifier `json:"Verifier,omitempty"`
   214  
   215  	// there can be multiple publishers for the job
   216  	Publisher Publisher `json:"Publisher,omitempty"`
   217  
   218  	// executor specific data
   219  	Docker   JobSpecDocker   `json:"Docker,omitempty"`
   220  	Language JobSpecLanguage `json:"Language,omitempty"`
   221  	Wasm     JobSpecWasm     `json:"Wasm,omitempty"`
   222  
   223  	// the compute (cpu, ram) resources this job requires
   224  	Resources ResourceUsageConfig `json:"Resources,omitempty"`
   225  
   226  	// The type of networking access that the job needs
   227  	Network NetworkConfig `json:"Network,omitempty"`
   228  
   229  	// How long a job can run in seconds before it is killed.
   230  	// This includes the time required to run, verify and publish results
   231  	Timeout float64 `json:"Timeout,omitempty"`
   232  
   233  	// the data volumes we will read in the job
   234  	// for example "read this ipfs cid"
   235  	// TODO: #667 Replace with "Inputs", "Outputs" (note the caps) for yaml/json when we update the n.js file
   236  	Inputs []StorageSpec `json:"inputs,omitempty"`
   237  
   238  	// Input volumes that will not be sharded
   239  	// for example to upload code into a base image
   240  	// every shard will get the full range of context volumes
   241  	Contexts []StorageSpec `json:"Contexts,omitempty"`
   242  
   243  	// the data volumes we will write in the job
   244  	// for example "write the results to ipfs"
   245  	Outputs []StorageSpec `json:"outputs,omitempty"`
   246  
   247  	// Annotations on the job - could be user or machine assigned
   248  	Annotations []string `json:"Annotations,omitempty"`
   249  
   250  	// NodeSelectors is a selector which must be true for the compute node to run this job.
   251  	NodeSelectors []LabelSelectorRequirement `json:"NodeSelectors,omitempty"`
   252  	// the sharding config for this job
   253  	// describes how the job might be split up into parallel shards
   254  	Sharding JobShardingConfig `json:"Sharding,omitempty"`
   255  
   256  	// Do not track specified by the client
   257  	DoNotTrack bool `json:"DoNotTrack,omitempty"`
   258  
   259  	// how will this job be executed by nodes on the network
   260  	ExecutionPlan JobExecutionPlan `json:"ExecutionPlan,omitempty"`
   261  
   262  	// The deal the client has made, such as which job bids they have accepted.
   263  	Deal Deal `json:"Deal,omitempty"`
   264  }
   265  
   266  // Return timeout duration
   267  func (s *Spec) GetTimeout() time.Duration {
   268  	return time.Duration(s.Timeout * float64(time.Second))
   269  }
   270  
   271  // Return pointers to all the storage specs in the spec.
   272  func (s *Spec) AllStorageSpecs() []*StorageSpec {
   273  	storages := []*StorageSpec{
   274  		&s.Language.Context,
   275  		&s.Wasm.EntryModule,
   276  	}
   277  
   278  	for _, collection := range [][]StorageSpec{
   279  		s.Contexts,
   280  		s.Inputs,
   281  		s.Outputs,
   282  	} {
   283  		for index := range collection {
   284  			storages = append(storages, &collection[index])
   285  		}
   286  	}
   287  
   288  	return storages
   289  }
   290  
   291  // for VM style executors
   292  type JobSpecDocker struct {
   293  	// this should be pullable by docker
   294  	Image string `json:"Image,omitempty"`
   295  	// optionally override the default entrypoint
   296  	Entrypoint []string `json:"Entrypoint,omitempty"`
   297  	// a map of env to run the container with
   298  	EnvironmentVariables []string `json:"EnvironmentVariables,omitempty"`
   299  	// working directory inside the container
   300  	WorkingDirectory string `json:"WorkingDirectory,omitempty"`
   301  }
   302  
   303  // for language style executors (can target docker or wasm)
   304  type JobSpecLanguage struct {
   305  	Language        string `json:"Language,omitempty"`        // e.g. python
   306  	LanguageVersion string `json:"LanguageVersion,omitempty"` // e.g. 3.8
   307  	// must this job be run in a deterministic context?
   308  	Deterministic bool `json:"DeterministicExecution,omitempty"`
   309  	// context is a tar file stored in ipfs, containing e.g. source code and requirements
   310  	Context StorageSpec `json:"JobContext,omitempty"`
   311  	// optional program specified on commandline, like python -c "print(1+1)"
   312  	Command string `json:"Command,omitempty"`
   313  	// optional program path relative to the context dir. one of Command or ProgramPath must be specified
   314  	ProgramPath string `json:"ProgramPath,omitempty"`
   315  	// optional requirements.txt (or equivalent) path relative to the context dir
   316  	RequirementsPath string `json:"RequirementsPath,omitempty"`
   317  }
   318  
   319  // Describes a raw WASM job
   320  type JobSpecWasm struct {
   321  	// The module that contains the WASM code to start running.
   322  	EntryModule StorageSpec `json:"EntryModule,omitempty"`
   323  
   324  	// The name of the function in the EntryModule to call to run the job. For
   325  	// WASI jobs, this will always be `_start`, but jobs can choose to call
   326  	// other WASM functions instead. The EntryPoint must be a zero-parameter
   327  	// zero-result function.
   328  	EntryPoint string `json:"EntryPoint,omitempty"`
   329  
   330  	// The arguments supplied to the program (i.e. as ARGV).
   331  	Parameters []string `json:"Parameters,omitempty"`
   332  
   333  	// The variables available in the environment of the running program.
   334  	EnvironmentVariables map[string]string `json:"EnvironmentVariables,omitempty"`
   335  
   336  	// TODO #880: Other WASM modules whose exports will be available as imports
   337  	// to the EntryModule.
   338  	ImportModules []StorageSpec `json:"ImportModules,omitempty"`
   339  }
   340  
   341  // gives us a way to keep local data against a job
   342  // so our compute node and requester node control loops
   343  // can keep state against a job without broadcasting it
   344  // to the rest of the network
   345  type JobLocalEvent struct {
   346  	EventName    JobLocalEventType `json:"EventName,omitempty"`
   347  	JobID        string            `json:"JobID,omitempty"`
   348  	ShardIndex   int               `json:"ShardIndex,omitempty"`
   349  	TargetNodeID string            `json:"TargetNodeID,omitempty"`
   350  }
   351  
   352  // we emit these to other nodes so they update their
   353  // state locally and can emit events locally
   354  type JobEvent struct {
   355  	// APIVersion of the Job
   356  	APIVersion string `json:"APIVersion,omitempty" example:"V1beta1"`
   357  
   358  	JobID string `json:"JobID,omitempty" example:"9304c616-291f-41ad-b862-54e133c0149e"`
   359  	// what shard is this event for
   360  	ShardIndex int `json:"ShardIndex,omitempty"`
   361  	// compute execution identifier
   362  	ExecutionID string `json:"ExecutionID,omitempty" example:"9304c616-291f-41ad-b862-54e133c0149e"`
   363  	// optional clientID if this is an externally triggered event (like create job)
   364  	ClientID string `json:"ClientID,omitempty" example:"ac13188e93c97a9c2e7cf8e86c7313156a73436036f30da1ececc2ce79f9ea51"`
   365  	// the node that emitted this event
   366  	SourceNodeID string `json:"SourceNodeID,omitempty" example:"QmXaXu9N5GNetatsvwnTfQqNtSeKAD6uCmarbh3LMRYAcF"`
   367  	// the node that this event is for
   368  	// e.g. "AcceptJobBid" was emitted by Requester but it targeting compute node
   369  	TargetNodeID string       `json:"TargetNodeID,omitempty" example:"QmdZQ7ZbhnvWY1J12XYKGHApJ6aufKyLNSvf8jZBrBaAVL"`
   370  	EventName    JobEventType `json:"EventName,omitempty"`
   371  	// this is only defined in "create" events
   372  	Spec Spec `json:"Spec,omitempty"`
   373  	// this is only defined in "create" events
   374  	JobExecutionPlan JobExecutionPlan `json:"JobExecutionPlan,omitempty"`
   375  	// this is only defined in "update_deal" events
   376  	Deal                 Deal               `json:"Deal,omitempty"`
   377  	Status               string             `json:"Status,omitempty" example:"Got results proposal of length: 0"`
   378  	VerificationProposal []byte             `json:"VerificationProposal,omitempty"`
   379  	VerificationResult   VerificationResult `json:"VerificationResult,omitempty"`
   380  	PublishedResult      StorageSpec        `json:"PublishedResult,omitempty"`
   381  
   382  	EventTime       time.Time `json:"EventTime,omitempty" example:"2022-11-17T13:32:55.756658941Z"`
   383  	SenderPublicKey PublicKey `json:"SenderPublicKey,omitempty"`
   384  
   385  	// RunOutput of the job
   386  	RunOutput *RunCommandResult `json:"RunOutput,omitempty"`
   387  }
   388  
   389  // we need to use a struct for the result because:
   390  // a) otherwise we don't know if VerificationResult==false
   391  // means "I've not verified yet" or "verification failed"
   392  // b) we might want to add further fields to the result later
   393  type VerificationResult struct {
   394  	Complete bool `json:"Complete,omitempty"`
   395  	Result   bool `json:"Result,omitempty"`
   396  }
   397  
   398  type JobCreatePayload struct {
   399  	// the id of the client that is submitting the job
   400  	ClientID string `json:"ClientID,omitempty" validate:"required"`
   401  
   402  	APIVersion string `json:"APIVersion,omitempty" example:"V1beta1" validate:"required"`
   403  
   404  	// The specification of this job.
   405  	Spec *Spec `json:"Spec,omitempty" validate:"required"`
   406  }